From 1e82a67cc4d082abe9437dd163314a543bd90232 Mon Sep 17 00:00:00 2001
From: Erik Karlsson <erik.r.karlsson@gmail.com>
Date: Fri, 6 Mar 2015 14:28:43 +0100
Subject: [PATCH] Additional refactoring

---
 .../photo/src/fast_nlmeans_denoising_invoker.hpp   | 17 ++--
 .../src/fast_nlmeans_denoising_invoker_commons.hpp | 91 +++++++++++++---------
 .../src/fast_nlmeans_multi_denoising_invoker.hpp   | 17 ++--
 3 files changed, 71 insertions(+), 54 deletions(-)
diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp
index 9dea2a0..ff35550 100644
--- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp
+++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp
@@ -75,7 +75,7 @@ private:
     int template_window_half_size_;
     int search_window_half_size_;
 
-    int fixed_point_mult_;
+    typename pixelInfo<WT>::sampleType fixed_point_mult_;
     int almost_template_window_size_sq_bin_shift_;
     std::vector<WT> almost_dist2weight_;
 
@@ -120,7 +120,7 @@ FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansDenoisingInvoker(
     const IT max_estimate_sum_value =
         (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
     fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
-                                          std::numeric_limits<int>::max());
+                                          pixelInfo<WT>::sampleMax());
 
     // precalc weight for every possible l2 dist between blocks
     // additional optimization of precalced weights to replace division(averaging) by binary shift
@@ -223,9 +223,11 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& ra
             }
 
             // calc weights
-            IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<T>::channels];
+            IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
             for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
-                estimation[channel_num] = weights_sum[channel_num] = 0;
+                estimation[channel_num] = 0;
+            for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
+                weights_sum[channel_num] = 0;
 
             for (int y = 0; y < search_window_size_; y++)
             {
@@ -240,11 +242,8 @@ void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& ra
                 }
             }
 
-            for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
-                estimation[channel_num] =
-                    (static_cast<UIT>(estimation[channel_num]) + weights_sum[channel_num]/2) /
-                    weights_sum[channel_num];
-
+            divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
+                                                                                      weights_sum);
             dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
         }
     }
diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp
index 53a6f5e..df8e470 100644
--- a/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp
+++ b/modules/photo/src/fast_nlmeans_denoising_invoker_commons.hpp
@@ -122,11 +122,11 @@ class DistAbs
         }
     };
 
-    static const double WEIGHT_THRESHOLD = 0.001;
     template <typename T, typename WT> struct calcWeight_
     {
-        static inline WT f(double dist, const float *h, int fixed_point_mult)
+        static inline WT f(double dist, const float *h, WT fixed_point_mult)
         {
+            static const double WEIGHT_THRESHOLD = 0.001;
             WT weight = (WT)round(fixed_point_mult *
                                   std::exp(-dist*dist / (h[0]*h[0] * pixelInfo<T>::channels)));
             if (weight < WEIGHT_THRESHOLD * fixed_point_mult)
@@ -137,17 +137,11 @@ class DistAbs
 
     template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
     {
-        static inline Vec<ET, n> f(double dist, const float *h, int fixed_point_mult)
+        static inline Vec<ET, n> f(double dist, const float *h, ET fixed_point_mult)
         {
             Vec<ET, n> res;
             for (int i=0; i<n; i++)
-            {
-                ET weight = (ET)round(fixed_point_mult *
-                                      std::exp(-dist*dist / (h[i]*h[i] * pixelInfo<T>::channels)));
-                if (weight < WEIGHT_THRESHOLD * fixed_point_mult)
-                    weight = 0;
-                res[i] = weight;
-            }
+                res[i] = calcWeight<T, ET>(dist, &h[i], fixed_point_mult);
             return res;
         }
     };
@@ -247,11 +241,11 @@ class DistSquared
         }
     };
 
-    static const double WEIGHT_THRESHOLD = 0.001;
     template <typename T, typename WT> struct calcWeight_
     {
         static inline WT f(double dist, const float *h, int fixed_point_mult)
         {
+            static const double WEIGHT_THRESHOLD = 0.001;
             WT weight = (WT)round(fixed_point_mult *
                                   std::exp(-dist / (h[0]*h[0] * pixelInfo<T>::channels)));
             if (weight < WEIGHT_THRESHOLD * fixed_point_mult)
@@ -266,13 +260,7 @@ class DistSquared
         {
             Vec<ET, n> res;
             for (int i=0; i<n; i++)
-            {
-                ET weight = (ET)round(fixed_point_mult *
-                                      std::exp(-dist / (h[i]*h[i] * pixelInfo<T>::channels)));
-                if (weight < WEIGHT_THRESHOLD * fixed_point_mult)
-                    weight = 0;
-                res[i] = weight;
-            }
+                res[i] = calcWeight<T, ET>(dist, &h[i], fixed_point_mult);
             return res;
         }
     };
@@ -320,48 +308,42 @@ template <typename T, typename IT, typename WT> struct incWithWeight_
     }
 };
 
-template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 2>, IT, int>
+template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 2>, IT, WT>
 {
-    static inline void f(IT* estimation, IT* weights_sum, int weight, Vec<ET, 2> p)
+    static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 2> p)
     {
         estimation[0] += (IT)weight * p[0];
         estimation[1] += (IT)weight * p[1];
         weights_sum[0] += (IT)weight;
-        weights_sum[1] += (IT)weight;
     }
 };
 
-template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 3>, IT, int>
+template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 3>, IT, WT>
 {
-    static inline void f(IT* estimation, IT* weights_sum, int weight, Vec<ET, 3> p)
+    static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 3> p)
     {
         estimation[0] += (IT)weight * p[0];
         estimation[1] += (IT)weight * p[1];
         estimation[2] += (IT)weight * p[2];
         weights_sum[0] += (IT)weight;
-        weights_sum[1] += (IT)weight;
-        weights_sum[2] += (IT)weight;
     }
 };
 
-template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 4>, IT, int>
+template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 4>, IT, WT>
 {
-    static inline void f(IT* estimation, IT* weights_sum, int weight, Vec<ET, 4> p)
+    static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 4> p)
     {
         estimation[0] += (IT)weight * p[0];
         estimation[1] += (IT)weight * p[1];
         estimation[2] += (IT)weight * p[2];
         estimation[3] += (IT)weight * p[3];
         weights_sum[0] += (IT)weight;
-        weights_sum[1] += (IT)weight;
-        weights_sum[2] += (IT)weight;
-        weights_sum[3] += (IT)weight;
     }
 };
 
-template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 2>, IT, Vec<int, 2> >
+template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 2>, IT, Vec<EW, 2> >
 {
-    static inline void f(IT* estimation, IT* weights_sum, Vec<int, 2> weight, Vec<ET, 2> p)
+    static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 2> weight, Vec<ET, 2> p)
     {
         estimation[0] += (IT)weight[0] * p[0];
         estimation[1] += (IT)weight[1] * p[1];
@@ -370,9 +352,9 @@ template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 2>, IT, Vec<in
     }
 };
 
-template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 3>, IT, Vec<int, 3> >
+template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 3>, IT, Vec<EW, 3> >
 {
-    static inline void f(IT* estimation, IT* weights_sum, Vec<int, 3> weight, Vec<ET, 3> p)
+    static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 3> weight, Vec<ET, 3> p)
     {
         estimation[0] += (IT)weight[0] * p[0];
         estimation[1] += (IT)weight[1] * p[1];
@@ -383,9 +365,9 @@ template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 3>, IT, Vec<in
     }
 };
 
-template <typename ET, typename IT> struct incWithWeight_<Vec<ET, 4>, IT, Vec<int, 4> >
+template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 4>, IT, Vec<EW, 4> >
 {
-    static inline void f(IT* estimation, IT* weights_sum, Vec<int, 4> weight, Vec<ET, 4> p)
+    static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 4> weight, Vec<ET, 4> p)
     {
         estimation[0] += (IT)weight[0] * p[0];
         estimation[1] += (IT)weight[1] * p[1];
@@ -404,6 +386,43 @@ static inline void incWithWeight(IT* estimation, IT* weights_sum, IT weight, T p
     return incWithWeight_<T, IT, WT>::f(estimation, weights_sum, weight, p);
 }
 
+template <typename IT, typename UIT, int nc, int nw> struct divByWeightsSum_
+{
+    static inline void f(IT* estimation, IT* weights_sum);
+};
+
+template <typename IT, typename UIT> struct divByWeightsSum_<IT, UIT, 1, 1>
+{
+    static inline void f(IT* estimation, IT* weights_sum)
+    {
+        estimation[0] = (static_cast<UIT>(estimation[0]) + weights_sum[0]/2) / weights_sum[0];
+    }
+};
+
+template <typename IT, typename UIT, int n> struct divByWeightsSum_<IT, UIT, n, 1>
+{
+    static inline void f(IT* estimation, IT* weights_sum)
+    {
+        for (size_t i = 0; i < n; i++)
+            estimation[i] = (static_cast<UIT>(estimation[i]) + weights_sum[0]/2) / weights_sum[0];
+    }
+};
+
+template <typename IT, typename UIT, int n> struct divByWeightsSum_<IT, UIT, n, n>
+{
+    static inline void f(IT* estimation, IT* weights_sum)
+    {
+        for (size_t i = 0; i < n; i++)
+            estimation[i] = (static_cast<UIT>(estimation[i]) + weights_sum[i]/2) / weights_sum[i];
+    }
+};
+
+template <typename IT, typename UIT, int nc, int nw>
+static inline void divByWeightsSum(IT* estimation, IT* weights_sum)
+{
+    return divByWeightsSum_<IT, UIT, nc, nw>::f(estimation, weights_sum);
+}
+
 template <typename T, typename IT> struct saturateCastFromArray_
 {
     static inline T f(IT* estimation)
diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
index 489ee67..cd3833a 100644
--- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
+++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
@@ -81,7 +81,7 @@ private:
     int search_window_half_size_;
     int temporal_window_half_size_;
 
-    int fixed_point_mult_;
+    typename pixelInfo<WT>::sampleType fixed_point_mult_;
     int almost_template_window_size_sq_bin_shift;
     std::vector<WT> almost_dist2weight;
 
@@ -128,7 +128,7 @@ FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansMultiDenoisingIn
     const IT max_estimate_sum_value =
         (IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
     fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
-                                          std::numeric_limits<int>::max());
+                                          pixelInfo<WT>::sampleMax());
 
     // precalc weight for every possible l2 dist between blocks
     // additional optimization of precalced weights to replace division(averaging) by binary shift
@@ -243,9 +243,11 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Rang
             }
 
             // calc weights
-            IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<T>::channels];
+            IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
             for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
-                estimation[channel_num] = weights_sum[channel_num] = 0;
+                estimation[channel_num] = 0;
+            for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
+                weights_sum[channel_num] = 0;
 
             for (int d = 0; d < temporal_window_size_; d++)
             {
@@ -267,11 +269,8 @@ void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Rang
                 }
             }
 
-            for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
-                estimation[channel_num] =
-                    (static_cast<UIT>(estimation[channel_num]) + weights_sum[channel_num] / 2) /
-                    weights_sum[channel_num];
-
+            divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
+                                                                                      weights_sum);
             dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
         }
     }
-- 
2.7.4