Refactoring in preparation for per-channel h-values
authorErik Karlsson <erik.r.karlsson@gmail.com>
Thu, 5 Mar 2015 16:50:52 +0000 (17:50 +0100)
committerErik Karlsson <erik.r.karlsson@gmail.com>
Thu, 5 Mar 2015 16:50:52 +0000 (17:50 +0100)
modules/photo/src/denoising.cpp
modules/photo/src/fast_nlmeans_denoising_invoker.hpp
modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp
modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp

index b41f83e..29899f7 100644 (file)
@@ -65,23 +65,23 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
     switch (src.type()) {
         case CV_8U:
             parallel_for_(cv::Range(0, src.rows),
-                    FastNlMeansDenoisingInvoker<uchar, int, unsigned, DistSquared>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<uchar, int, unsigned, DistSquared, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC2:
             parallel_for_(cv::Range(0, src.rows),
-                    FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned, DistSquared>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned, DistSquared, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC3:
             parallel_for_(cv::Range(0, src.rows),
-                    FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned, DistSquared>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned, DistSquared, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC4:
             parallel_for_(cv::Range(0, src.rows),
-                    FastNlMeansDenoisingInvoker<cv::Vec4b, int, unsigned, DistSquared>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<cv::Vec4b, int, unsigned, DistSquared, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         default:
             CV_Error(Error::StsBadArg,
@@ -104,43 +104,43 @@ void cv::fastNlMeansDenoisingAbs( InputArray _src, OutputArray _dst, float h,
     switch (src.type()) {
         case CV_8U:
             parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<uchar, int, unsigned, DistAbs>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<uchar, int, unsigned, DistAbs, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC2:
             parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned, DistAbs>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<cv::Vec2b, int, unsigned, DistAbs, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC3:
             parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned, DistAbs>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<cv::Vec3b, int, unsigned, DistAbs, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC4:
             parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<cv::Vec4b, int, unsigned, DistAbs>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<cv::Vec4b, int, unsigned, DistAbs, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_16U:
             parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<ushort, int64, uint64, DistAbs>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<ushort, int64, uint64, DistAbs, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_16UC2:
             parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<cv::Vec<ushort, 2>, int64, uint64, DistAbs>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<cv::Vec<ushort, 2>, int64, uint64, DistAbs, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_16UC3:
             parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<cv::Vec<ushort, 3>, int64, uint64, DistAbs>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<cv::Vec<ushort, 3>, int64, uint64, DistAbs, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_16UC4:
             parallel_for_(cv::Range(0, src.rows),
-                FastNlMeansDenoisingInvoker<cv::Vec<ushort, 4>, int64, uint64, DistAbs>(
-                    src, dst, templateWindowSize, searchWindowSize, h));
+                    FastNlMeansDenoisingInvoker<cv::Vec<ushort, 4>, int64, uint64, DistAbs, int>(
+                    src, dst, templateWindowSize, searchWindowSize, &h));
             break;
         default:
             CV_Error(Error::StsBadArg,
@@ -239,27 +239,27 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
     {
         case CV_8U:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<uchar, int, unsigned, DistSquared>(
+                FastNlMeansMultiDenoisingInvoker<uchar, int, unsigned, DistSquared, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC2:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned, DistSquared>(
+                FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned, DistSquared, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC3:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned, DistSquared>(
+                FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned, DistSquared, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC4:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec4b, int, unsigned, DistSquared>(
+                FastNlMeansMultiDenoisingInvoker<cv::Vec4b, int, unsigned, DistSquared, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         default:
             CV_Error(Error::StsBadArg,
@@ -285,51 +285,51 @@ void cv::fastNlMeansDenoisingMultiAbs( InputArrayOfArrays _srcImgs, OutputArray
     {
         case CV_8U:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<uchar, int, unsigned, DistAbs>(
+                FastNlMeansMultiDenoisingInvoker<uchar, int, unsigned, DistAbs, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC2:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned, DistAbs>(
+                FastNlMeansMultiDenoisingInvoker<cv::Vec2b, int, unsigned, DistAbs, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC3:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned, DistAbs>(
+                FastNlMeansMultiDenoisingInvoker<cv::Vec3b, int, unsigned, DistAbs, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_8UC4:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec4b, int, unsigned, DistAbs>(
+                FastNlMeansMultiDenoisingInvoker<cv::Vec4b, int, unsigned, DistAbs, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_16U:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<ushort, int64, uint64, DistAbs>(
+                FastNlMeansMultiDenoisingInvoker<ushort, int64, uint64, DistAbs, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_16UC2:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 2>, int64, uint64, DistAbs>(
+                FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 2>, int64, uint64, DistAbs, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_16UC3:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 3>, int64, uint64, DistAbs>(
+                FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 3>, int64, uint64, DistAbs, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         case CV_16UC4:
             parallel_for_(cv::Range(0, srcImgs[0].rows),
-                FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 4>, int64, uint64, DistAbs>(
+                FastNlMeansMultiDenoisingInvoker<cv::Vec<ushort, 4>, int64, uint64, DistAbs, int>(
                     srcImgs, imgToDenoiseIndex, temporalWindowSize,
-                    dst, templateWindowSize, searchWindowSize, h));
+                    dst, templateWindowSize, searchWindowSize, &h));
             break;
         default:
             CV_Error(Error::StsBadArg,
index ec154fb..9dea2a0 100644 (file)
 
 using namespace cv;
 
-template <typename T, typename IT, typename UIT, typename D>
+template <typename T, typename IT, typename UIT, typename D, typename WT>
 struct FastNlMeansDenoisingInvoker :
         public ParallelLoopBody
 {
 public:
     FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
-        int template_window_size, int search_window_size, const float h);
+        int template_window_size, int search_window_size, const float *h);
 
     void operator() (const Range& range) const;
 
@@ -77,7 +77,7 @@ private:
 
     int fixed_point_mult_;
     int almost_template_window_size_sq_bin_shift_;
-    std::vector<int> almost_dist2weight_;
+    std::vector<WT> almost_dist2weight_;
 
     void calcDistSumsForFirstElementInRow(
         int i, Array2d<int>& dist_sums,
@@ -99,12 +99,12 @@ inline int getNearestPowerOf2(int value)
     return p;
 }
 
-template <typename T, typename IT, typename UIT, typename D>
-FastNlMeansDenoisingInvoker<T, IT, UIT, D>::FastNlMeansDenoisingInvoker(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansDenoisingInvoker(
     const Mat& src, Mat& dst,
     int template_window_size,
     int search_window_size,
-    const float h) :
+    const float *h) :
     src_(src), dst_(dst)
 {
     CV_Assert(src.channels() == pixelInfo<T>::channels);
@@ -133,25 +133,20 @@ FastNlMeansDenoisingInvoker<T, IT, UIT, D>::FastNlMeansDenoisingInvoker(
     int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
     almost_dist2weight_.resize(almost_max_dist);
 
-    const double WEIGHT_THRESHOLD = 0.001;
     for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
     {
         double dist = almost_dist * almost_dist2actual_dist_multiplier;
-        int weight = (int)round(fixed_point_mult_ * D::template calcWeight<T>(dist, h));
-        if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
-            weight = 0;
-
-        almost_dist2weight_[almost_dist] = weight;
+        almost_dist2weight_[almost_dist] =
+            D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
     }
-    CV_Assert(almost_dist2weight_[0] == fixed_point_mult_);
 
     // additional optimization init end
     if (dst_.empty())
         dst_ = Mat::zeros(src_.size(), src_.type());
 }
 
-template <typename T, typename IT, typename UIT, typename D>
-void FastNlMeansDenoisingInvoker<T, IT, UIT, D>::operator() (const Range& range) const
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
 {
     int row_from = range.start;
     int row_to = range.end - 1;
@@ -228,9 +223,9 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT, D>::operator() (const Range& range)
             }
 
             // calc weights
-            IT estimation[pixelInfo<T>::channels], weights_sum = 0;
+            IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<T>::channels];
             for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
-                estimation[channel_num] = 0;
+                estimation[channel_num] = weights_sum[channel_num] = 0;
 
             for (int y = 0; y < search_window_size_; y++)
             {
@@ -240,23 +235,23 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT, D>::operator() (const Range& range)
                 {
                     int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
                     int weight = almost_dist2weight_[almostAvgDist];
-                    weights_sum += (IT)weight;
-
                     T p = cur_row_ptr[border_size_ + search_window_x + x];
-                    incWithWeight<T, IT>(estimation, weight, p);
+                    incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
                 }
             }
 
             for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
-                estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum/2) / weights_sum;
+                estimation[channel_num] =
+                    (static_cast<UIT>(estimation[channel_num]) + weights_sum[channel_num]/2) /
+                    weights_sum[channel_num];
 
             dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
         }
     }
 }
 
-template <typename T, typename IT, typename UIT, typename D>
-inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D>::calcDistSumsForFirstElementInRow(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
     int i,
     Array2d<int>& dist_sums,
     Array3d<int>& col_dist_sums,
@@ -289,8 +284,8 @@ inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D>::calcDistSumsForFirstElem
         }
 }
 
-template <typename T, typename IT, typename UIT, typename D>
-inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D>::calcDistSumsForElementInFirstRow(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
     int i, int j, int first_col_num,
     Array2d<int>& dist_sums,
     Array3d<int>& col_dist_sums,
index 4d66efe..53a6f5e 100644 (file)
@@ -122,6 +122,36 @@ class DistAbs
         }
     };
 
+    static const double WEIGHT_THRESHOLD = 0.001;
+    template <typename T, typename WT> struct calcWeight_
+    {
+        static inline WT f(double dist, const float *h, int fixed_point_mult)
+        {
+            WT weight = (WT)round(fixed_point_mult *
+                                  std::exp(-dist*dist / (h[0]*h[0] * pixelInfo<T>::channels)));
+            if (weight < WEIGHT_THRESHOLD * fixed_point_mult)
+                weight = 0;
+            return weight;
+        }
+    };
+
+    template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
+    {
+        static inline Vec<ET, n> f(double dist, const float *h, int fixed_point_mult)
+        {
+            Vec<ET, n> res;
+            for (int i=0; i<n; i++)
+            {
+                ET weight = (ET)round(fixed_point_mult *
+                                      std::exp(-dist*dist / (h[i]*h[i] * pixelInfo<T>::channels)));
+                if (weight < WEIGHT_THRESHOLD * fixed_point_mult)
+                    weight = 0;
+                res[i] = weight;
+            }
+            return res;
+        }
+    };
+
 public:
     template <typename T> static inline int calcDist(const T a, const T b)
     {
@@ -142,14 +172,14 @@ public:
         return calcDist<T>(a_down, b_down) - calcDist<T>(a_up, b_up);
     };
 
-    template <typename T>
-    static double calcWeight(double dist, double h)
+    template <typename T, typename WT>
+    static inline WT calcWeight(double dist, const float *h, int fixed_point_mult)
     {
-        return std::exp(-dist*dist / (h * h * pixelInfo<T>::channels));
+        return calcWeight_<T, WT>::f(dist, h, fixed_point_mult);
     }
 
     template <typename T>
-    static double maxDist()
+    static inline double maxDist()
     {
         return (int)pixelInfo<T>::sampleMax() * pixelInfo<T>::channels;
     }
@@ -217,6 +247,36 @@ class DistSquared
         }
     };
 
+    static const double WEIGHT_THRESHOLD = 0.001;
+    template <typename T, typename WT> struct calcWeight_
+    {
+        static inline WT f(double dist, const float *h, int fixed_point_mult)
+        {
+            WT weight = (WT)round(fixed_point_mult *
+                                  std::exp(-dist / (h[0]*h[0] * pixelInfo<T>::channels)));
+            if (weight < WEIGHT_THRESHOLD * fixed_point_mult)
+                weight = 0;
+            return weight;
+        }
+    };
+
+    template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
+    {
+        static inline Vec<ET, n> f(double dist, const float *h, int fixed_point_mult)
+        {
+            Vec<ET, n> res;
+            for (int i=0; i<n; i++)
+            {
+                ET weight = (ET)round(fixed_point_mult *
+                                      std::exp(-dist / (h[i]*h[i] * pixelInfo<T>::channels)));
+                if (weight < WEIGHT_THRESHOLD * fixed_point_mult)
+                    weight = 0;
+                res[i] = weight;
+            }
+            return res;
+        }
+    };
+
 public:
     template <typename T> static inline int calcDist(const T a, const T b)
     {
@@ -237,62 +297,111 @@ public:
         return calcUpDownDist_<T>::f(a_up, a_down, b_up, b_down);
     };
 
-    template <typename T>
-    static double calcWeight(double dist, double h)
+    template <typename T, typename WT>
+    static inline WT calcWeight(double dist, const float *h, int fixed_point_mult)
     {
-        return std::exp(-dist / (h * h * pixelInfo<T>::channels));
+        return calcWeight_<T, WT>::f(dist, h, fixed_point_mult);
     }
 
     template <typename T>
-    static double maxDist()
+    static inline double maxDist()
     {
         return (int)pixelInfo<T>::sampleMax() * (int)pixelInfo<T>::sampleMax() *
             pixelInfo<T>::channels;
     }
 };
 
-template <typename T, typename IT> struct incWithWeight_
+template <typename T, typename IT, typename WT> struct incWithWeight_
 {
-    static inline void f(IT* estimation, int weight, T p)
+    static inline void f(IT* estimation, IT* weights_sum, WT weight, T p)
     {
         estimation[0] += (IT)weight * p;
+        weights_sum[0] += (IT)weight;
     }
 };
 
-template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 2>, IT>
+template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 2>, IT, int>
 {
-    static inline void f(IT* estimation, int weight, Vec<ET, 2> p)
+    static inline void f(IT* estimation, IT* weights_sum, int weight, Vec<ET, 2> p)
     {
         estimation[0] += (IT)weight * p[0];
         estimation[1] += (IT)weight * p[1];
+        weights_sum[0] += (IT)weight;
+        weights_sum[1] += (IT)weight;
     }
 };
 
-template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 3>, IT>
+template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 3>, IT, int>
 {
-    static inline void f(IT* estimation, int weight, Vec<ET, 3> p)
+    static inline void f(IT* estimation, IT* weights_sum, int weight, Vec<ET, 3> p)
     {
         estimation[0] += (IT)weight * p[0];
         estimation[1] += (IT)weight * p[1];
         estimation[2] += (IT)weight * p[2];
+        weights_sum[0] += (IT)weight;
+        weights_sum[1] += (IT)weight;
+        weights_sum[2] += (IT)weight;
     }
 };
 
-template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 4>, IT>
+template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 4>, IT, int>
 {
-    static inline void f(IT* estimation, int weight, Vec<ET, 4> p)
+    static inline void f(IT* estimation, IT* weights_sum, int weight, Vec<ET, 4> p)
     {
         estimation[0] += (IT)weight * p[0];
         estimation[1] += (IT)weight * p[1];
         estimation[2] += (IT)weight * p[2];
         estimation[3] += (IT)weight * p[3];
+        weights_sum[0] += (IT)weight;
+        weights_sum[1] += (IT)weight;
+        weights_sum[2] += (IT)weight;
+        weights_sum[3] += (IT)weight;
+    }
+};
+
+template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 2>, IT, Vec<int, 2> >
+{
+    static inline void f(IT* estimation, IT* weights_sum, Vec<int, 2> weight, Vec<ET, 2> p)
+    {
+        estimation[0] += (IT)weight[0] * p[0];
+        estimation[1] += (IT)weight[1] * p[1];
+        weights_sum[0] += (IT)weight[0];
+        weights_sum[1] += (IT)weight[1];
+    }
+};
+
+template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 3>, IT, Vec<int, 3> >
+{
+    static inline void f(IT* estimation, IT* weights_sum, Vec<int, 3> weight, Vec<ET, 3> p)
+    {
+        estimation[0] += (IT)weight[0] * p[0];
+        estimation[1] += (IT)weight[1] * p[1];
+        estimation[2] += (IT)weight[2] * p[2];
+        weights_sum[0] += (IT)weight[0];
+        weights_sum[1] += (IT)weight[1];
+        weights_sum[2] += (IT)weight[2];
+    }
+};
+
+template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 4>, IT, Vec<int, 4> >
+{
+    static inline void f(IT* estimation, IT* weights_sum, Vec<int, 4> weight, Vec<ET, 4> p)
+    {
+        estimation[0] += (IT)weight[0] * p[0];
+        estimation[1] += (IT)weight[1] * p[1];
+        estimation[2] += (IT)weight[2] * p[2];
+        estimation[3] += (IT)weight[3] * p[3];
+        weights_sum[0] += (IT)weight[0];
+        weights_sum[1] += (IT)weight[1];
+        weights_sum[2] += (IT)weight[2];
+        weights_sum[3] += (IT)weight[3];
     }
 };
 
-template <typename T, typename IT>
-static inline void incWithWeight(IT* estimation, IT weight, T p)
+template <typename T, typename IT, typename WT>
+static inline void incWithWeight(IT* estimation, IT* weights_sum, IT weight, T p)
 {
-    return incWithWeight_<T, IT>::f(estimation, weight, p);
+    return incWithWeight_<T, IT, WT>::f(estimation, weights_sum, weight, p);
 }
 
 template <typename T, typename IT> struct saturateCastFromArray_
index f9c1264..489ee67 100644 (file)
 
 using namespace cv;
 
-template <typename T, typename IT, typename UIT, typename D>
+template <typename T, typename IT, typename UIT, typename D, typename WT>
 struct FastNlMeansMultiDenoisingInvoker :
         ParallelLoopBody
 {
 public:
     FastNlMeansMultiDenoisingInvoker(const std::vector<Mat>& srcImgs, int imgToDenoiseIndex,
                                      int temporalWindowSize, Mat& dst, int template_window_size,
-                                     int search_window_size, const float h);
+                                     int search_window_size, const float *h);
 
     void operator() (const Range& range) const;
 
@@ -83,7 +83,7 @@ private:
 
     int fixed_point_mult_;
     int almost_template_window_size_sq_bin_shift;
-    std::vector<int> almost_dist2weight;
+    std::vector<WT> almost_dist2weight;
 
     void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums,
                                           Array4d<int>& col_dist_sums,
@@ -94,15 +94,15 @@ private:
                                           Array4d<int>& up_col_dist_sums) const;
 };
 
-template <typename T, typename IT, typename UIT, typename D>
-FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D>::FastNlMeansMultiDenoisingInvoker(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansMultiDenoisingInvoker(
     const std::vector<Mat>& srcImgs,
     int imgToDenoiseIndex,
     int temporalWindowSize,
     cv::Mat& dst,
     int template_window_size,
     int search_window_size,
-    const float h) :
+    const float *h) :
         dst_(dst), extended_srcs_(srcImgs.size())
 {
     CV_Assert(srcImgs.size() > 0);
@@ -144,25 +144,20 @@ FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D>::FastNlMeansMultiDenoisingInvoke
     int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
     almost_dist2weight.resize(almost_max_dist);
 
-    const double WEIGHT_THRESHOLD = 0.001;
     for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
     {
         double dist = almost_dist * almost_dist2actual_dist_multiplier;
-        int weight = (int)round(fixed_point_mult_ * D::template calcWeight<T>(dist, h));
-        if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
-            weight = 0;
-
-        almost_dist2weight[almost_dist] = weight;
+        almost_dist2weight[almost_dist] =
+            D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
     }
-    CV_Assert(almost_dist2weight[0] == fixed_point_mult_);
 
     // additional optimization init end
     if (dst_.empty())
         dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type());
 }
 
-template <typename T, typename IT, typename UIT, typename D>
-void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D>::operator() (const Range& range) const
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
 {
     int row_from = range.start;
     int row_to = range.end - 1;
@@ -248,11 +243,9 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D>::operator() (const Range& r
             }
 
             // calc weights
-            IT weights_sum = 0;
-
-            IT estimation[pixelInfo<T>::channels];
+            IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<T>::channels];
             for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
-                estimation[channel_num] = 0;
+                estimation[channel_num] = weights_sum[channel_num] = 0;
 
             for (int d = 0; d < temporal_window_size_; d++)
             {
@@ -268,25 +261,24 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D>::operator() (const Range& r
                         int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift;
 
                         int weight =  almost_dist2weight[almostAvgDist];
-                        weights_sum += (IT)weight;
-
                         T p = cur_row_ptr[border_size_ + search_window_x + x];
-                        incWithWeight<T, IT>(estimation, weight, p);
+                        incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
                     }
                 }
             }
 
             for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
-                estimation[channel_num] = (static_cast<UIT>(estimation[channel_num]) + weights_sum / 2) / weights_sum;
+                estimation[channel_num] =
+                    (static_cast<UIT>(estimation[channel_num]) + weights_sum[channel_num] / 2) /
+                    weights_sum[channel_num];
 
             dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
-
         }
     }
 }
 
-template <typename T, typename IT, typename UIT, typename D>
-inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D>::calcDistSumsForFirstElementInRow(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
         int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
 {
     int j = 0;
@@ -326,8 +318,8 @@ inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D>::calcDistSumsForFirs
     }
 }
 
-template <typename T, typename IT, typename UIT, typename D>
-inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D>::calcDistSumsForElementInFirstRow(
+template <typename T, typename IT, typename UIT, typename D, typename WT>
+inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
     int i, int j, int first_col_num, Array3d<int>& dist_sums,
     Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
 {