From e647b7c7e8a15765f7a18ed496fd2313338b900f Mon Sep 17 00:00:00 2001 From: Erik Karlsson Date: Tue, 17 Feb 2015 23:08:36 +0100 Subject: [PATCH] Calculating almost_dist2weight at full size to avoid bounds checking --- .../photo/src/fast_nlmeans_denoising_invoker.hpp | 25 +++++++++------------- .../src/fast_nlmeans_multi_denoising_invoker.hpp | 25 +++++++++------------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index cbf9d25..a641c99 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -128,22 +128,20 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq); double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq; - const double WEIGHT_THRESHOLD = 0.001; - const size_t ALLOC_CHUNK = 65536; IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; - size_t almost_max_dist = 0; - while (true) + size_t almost_max_dist = (size_t)(max_dist / almost_dist2actual_dist_multiplier + 1); + almost_dist2weight_.resize(almost_max_dist); + + const double WEIGHT_THRESHOLD = 0.001; + for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { - double dist = almost_max_dist * almost_dist2actual_dist_multiplier; + double dist = almost_dist * almost_dist2actual_dist_multiplier; IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist*dist / (h * h * pixelInfo::channels))); - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break; + if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) + weight = 0; - if (almost_max_dist >= almost_dist2weight_.size()) - almost_dist2weight_.resize(almost_max_dist + ALLOC_CHUNK); - - almost_dist2weight_[almost_max_dist++] = weight; + almost_dist2weight_[almost_dist] = weight; } - almost_dist2weight_.resize(almost_max_dist); CV_Assert(almost_dist2weight_[0] == fixed_point_mult_); // additional optimization init end @@ -157,8 +155,6 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co int row_from = range.start; int row_to = range.end - 1; - size_t almost_max_dist = almost_dist2weight_.size(); - // sums of cols anf rows for current pixel p Array2d dist_sums(search_window_size_, search_window_size_); @@ -242,8 +238,7 @@ void FastNlMeansDenoisingInvoker::operator() (const Range& range) co for (int x = 0; x < search_window_size_; x++) { size_t almostAvgDist = (size_t)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_); - IT weight = - almostAvgDist < almost_max_dist ? almost_dist2weight_[almostAvgDist] : 0; + IT weight = almost_dist2weight_[almostAvgDist]; weights_sum += weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index f12a0ef..808b01f 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -139,22 +139,20 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift; double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq; - const double WEIGHT_THRESHOLD = 0.001; - const size_t ALLOC_CHUNK = 65536; IT max_dist = (IT)pixelInfo::sampleMax() * (IT)pixelInfo::channels; - size_t almost_max_dist = 0; - while (true) + int almost_max_dist = (int) (max_dist / almost_dist2actual_dist_multiplier + 1); + almost_dist2weight.resize(almost_max_dist); + + const double WEIGHT_THRESHOLD = 0.001; + for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++) { - double dist = almost_max_dist * almost_dist2actual_dist_multiplier; + double dist = almost_dist * almost_dist2actual_dist_multiplier; IT weight = (IT)round(fixed_point_mult_ * std::exp(-dist*dist / (h * h * pixelInfo::channels))); - if (weight < WEIGHT_THRESHOLD * fixed_point_mult_ || dist > max_dist) break; + if (weight < WEIGHT_THRESHOLD * fixed_point_mult_) + weight = 0; - if (almost_max_dist >= almost_dist2weight.size()) - almost_dist2weight.resize(almost_max_dist + ALLOC_CHUNK); - - almost_dist2weight[almost_max_dist++] = weight; + almost_dist2weight[almost_dist] = weight; } - almost_dist2weight.resize(almost_max_dist); CV_Assert(almost_dist2weight[0] == fixed_point_mult_); // additional optimization init end @@ -168,8 +166,6 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang int row_from = range.start; int row_to = range.end - 1; - size_t almost_max_dist = almost_dist2weight.size(); - Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); // for lazy calc optimization @@ -270,8 +266,7 @@ void FastNlMeansMultiDenoisingInvoker::operator() (const Range& rang { size_t almostAvgDist = (size_t)(dist_sums_row[x] >> almost_template_window_size_sq_bin_shift); - IT weight = - almostAvgDist < almost_max_dist ? almost_dist2weight[almostAvgDist] : 0; + IT weight = almost_dist2weight[almostAvgDist]; weights_sum += weight; T p = cur_row_ptr[border_size_ + search_window_x + x]; -- 2.7.4