Rect dst_roi_final_;
bool can_use_gpu_;
int weight_type_; //CV_32F or CV_16S
+#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+ std::vector<cuda::GpuMat> gpu_dst_pyr_laplace_;
+ std::vector<cuda::GpuMat> gpu_dst_band_weights_;
+#endif
};
#include "precomp.hpp"
#include "opencl_kernels_stitching.hpp"
+#ifdef HAVE_CUDA
+ namespace cv { namespace cuda { namespace device
+ {
+ namespace blend
+ {
+ void addSrcWeightGpu16S(const PtrStep<short> src, const PtrStep<short> src_weight,
+ PtrStep<short> dst, PtrStep<short> dst_weight, cv::Rect &rc);
+ void addSrcWeightGpu32F(const PtrStep<short> src, const PtrStepf src_weight,
+ PtrStep<short> dst, PtrStepf dst_weight, cv::Rect &rc);
+ void normalizeUsingWeightMapGpu16S(const PtrStep<short> weight, PtrStep<short> src,
+ const int width, const int height);
+ void normalizeUsingWeightMapGpu32F(const PtrStepf weight, PtrStep<short> src,
+ const int width, const int height);
+ }
+ }}}
+#endif
+
namespace cv {
namespace detail {
Blender::prepare(dst_roi);
- dst_pyr_laplace_.resize(num_bands_ + 1);
- dst_pyr_laplace_[0] = dst_;
+#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+ if (can_use_gpu_)
+ {
+ gpu_dst_pyr_laplace_.resize(num_bands_ + 1);
+ gpu_dst_pyr_laplace_[0].create(dst_roi.size(), CV_16SC3);
+ gpu_dst_pyr_laplace_[0].setTo(Scalar::all(0));
- dst_band_weights_.resize(num_bands_ + 1);
- dst_band_weights_[0].create(dst_roi.size(), weight_type_);
- dst_band_weights_[0].setTo(0);
+ gpu_dst_band_weights_.resize(num_bands_ + 1);
+ gpu_dst_band_weights_[0].create(dst_roi.size(), weight_type_);
+ gpu_dst_band_weights_[0].setTo(0);
- for (int i = 1; i <= num_bands_; ++i)
+ for (int i = 1; i <= num_bands_; ++i)
+ {
+ gpu_dst_pyr_laplace_[i].create((gpu_dst_pyr_laplace_[i - 1].rows + 1) / 2,
+ (gpu_dst_pyr_laplace_[i - 1].cols + 1) / 2, CV_16SC3);
+ gpu_dst_band_weights_[i].create((gpu_dst_band_weights_[i - 1].rows + 1) / 2,
+ (gpu_dst_band_weights_[i - 1].cols + 1) / 2, weight_type_);
+ gpu_dst_pyr_laplace_[i].setTo(Scalar::all(0));
+ gpu_dst_band_weights_[i].setTo(0);
+ }
+ }
+ else
+#endif
{
- dst_pyr_laplace_[i].create((dst_pyr_laplace_[i - 1].rows + 1) / 2,
- (dst_pyr_laplace_[i - 1].cols + 1) / 2, CV_16SC3);
- dst_band_weights_[i].create((dst_band_weights_[i - 1].rows + 1) / 2,
- (dst_band_weights_[i - 1].cols + 1) / 2, weight_type_);
- dst_pyr_laplace_[i].setTo(Scalar::all(0));
- dst_band_weights_[i].setTo(0);
+ dst_pyr_laplace_.resize(num_bands_ + 1);
+ dst_pyr_laplace_[0] = dst_;
+
+ dst_band_weights_.resize(num_bands_ + 1);
+ dst_band_weights_[0].create(dst_roi.size(), weight_type_);
+ dst_band_weights_[0].setTo(0);
+
+ for (int i = 1; i <= num_bands_; ++i)
+ {
+ dst_pyr_laplace_[i].create((dst_pyr_laplace_[i - 1].rows + 1) / 2,
+ (dst_pyr_laplace_[i - 1].cols + 1) / 2, CV_16SC3);
+ dst_band_weights_[i].create((dst_band_weights_[i - 1].rows + 1) / 2,
+ (dst_band_weights_[i - 1].cols + 1) / 2, weight_type_);
+ dst_pyr_laplace_[i].setTo(Scalar::all(0));
+ dst_band_weights_[i].setTo(0);
+ }
}
}
int bottom = br_new.y - tl.y - img.rows;
int right = br_new.x - tl.x - img.cols;
+#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+ if (can_use_gpu_)
+ {
+ // Create the source image Laplacian pyramid
+ cuda::GpuMat gpu_img;
+ gpu_img.upload(img);
+ cuda::GpuMat img_with_border;
+ cuda::copyMakeBorder(gpu_img, img_with_border, top, bottom, left, right, BORDER_REFLECT);
+ std::vector<cuda::GpuMat> gpu_src_pyr_laplace(num_bands_ + 1);
+ img_with_border.convertTo(gpu_src_pyr_laplace[0], CV_16S);
+ for (int i = 0; i < num_bands_; ++i)
+ cuda::pyrDown(gpu_src_pyr_laplace[i], gpu_src_pyr_laplace[i + 1]);
+ for (int i = 0; i < num_bands_; ++i)
+ {
+ cuda::GpuMat up;
+ cuda::pyrUp(gpu_src_pyr_laplace[i + 1], up);
+ cuda::subtract(gpu_src_pyr_laplace[i], up, gpu_src_pyr_laplace[i]);
+ }
+
+ // Create the weight map Gaussian pyramid
+ cuda::GpuMat gpu_mask;
+ gpu_mask.upload(mask);
+ cuda::GpuMat weight_map;
+ std::vector<cuda::GpuMat> gpu_weight_pyr_gauss(num_bands_ + 1);
+
+ if (weight_type_ == CV_32F)
+ {
+ gpu_mask.convertTo(weight_map, CV_32F, 1. / 255.);
+ }
+ else // weight_type_ == CV_16S
+ {
+ gpu_mask.convertTo(weight_map, CV_16S);
+ cuda::GpuMat add_mask;
+ cuda::compare(gpu_mask, 0, add_mask, CMP_NE);
+ cuda::add(weight_map, Scalar::all(1), weight_map, add_mask);
+ }
+ cuda::copyMakeBorder(weight_map, gpu_weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT);
+ for (int i = 0; i < num_bands_; ++i)
+ cuda::pyrDown(gpu_weight_pyr_gauss[i], gpu_weight_pyr_gauss[i + 1]);
+
+ int y_tl = tl_new.y - dst_roi_.y;
+ int y_br = br_new.y - dst_roi_.y;
+ int x_tl = tl_new.x - dst_roi_.x;
+ int x_br = br_new.x - dst_roi_.x;
+
+ // Add weighted layer of the source image to the final Laplacian pyramid layer
+ for (int i = 0; i <= num_bands_; ++i)
+ {
+ Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
+ cuda::GpuMat &_src_pyr_laplace = gpu_src_pyr_laplace[i];
+ cuda::GpuMat _dst_pyr_laplace = gpu_dst_pyr_laplace_[i](rc);
+ cuda::GpuMat &_weight_pyr_gauss = gpu_weight_pyr_gauss[i];
+ cuda::GpuMat _dst_band_weights = gpu_dst_band_weights_[i](rc);
+
+ using namespace cv::cuda::device::blend;
+ if (weight_type_ == CV_32F)
+ {
+ addSrcWeightGpu32F(_src_pyr_laplace, _weight_pyr_gauss, _dst_pyr_laplace, _dst_band_weights, rc);
+ }
+ else
+ {
+ addSrcWeightGpu16S(_src_pyr_laplace, _weight_pyr_gauss, _dst_pyr_laplace, _dst_band_weights, rc);
+ }
+ x_tl /= 2; y_tl /= 2;
+ x_br /= 2; y_br /= 2;
+ }
+ return;
+ }
+#endif
+
// Create the source image Laplacian pyramid
UMat img_with_border;
copyMakeBorder(_img, img_with_border, top, bottom, left, right,
#endif
std::vector<UMat> src_pyr_laplace;
- if (can_use_gpu_ && img_with_border.depth() == CV_16S)
- createLaplacePyrGpu(img_with_border, num_bands_, src_pyr_laplace);
- else
- createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace);
+ createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace);
LOGLN(" Create the source image Laplacian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
#if ENABLE_LOG
void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
{
- for (int i = 0; i <= num_bands_; ++i)
- normalizeUsingWeightMap(dst_band_weights_[i], dst_pyr_laplace_[i]);
-
+ cv::UMat dst_band_weights_0;
+ Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height);
+#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
if (can_use_gpu_)
- restoreImageFromLaplacePyrGpu(dst_pyr_laplace_);
+ {
+ for (int i = 0; i <= num_bands_; ++i)
+ {
+ cuda::GpuMat dst_i = gpu_dst_pyr_laplace_[i];
+ cuda::GpuMat weight_i = gpu_dst_band_weights_[i];
+
+ using namespace ::cv::cuda::device::blend;
+ if (weight_type_ == CV_32F)
+ {
+ normalizeUsingWeightMapGpu32F(weight_i, dst_i, weight_i.cols, weight_i.rows);
+ }
+ else
+ {
+ normalizeUsingWeightMapGpu16S(weight_i, dst_i, weight_i.cols, weight_i.rows);
+ }
+ }
+
+ // Restore image from Laplacian pyramid
+ for (size_t i = num_bands_; i > 0; --i)
+ {
+ cuda::GpuMat up;
+ cuda::pyrUp(gpu_dst_pyr_laplace_[i], up);
+ cuda::add(up, gpu_dst_pyr_laplace_[i - 1], gpu_dst_pyr_laplace_[i - 1]);
+ }
+
+ gpu_dst_pyr_laplace_[0](dst_rc).download(dst_);
+ gpu_dst_band_weights_[0].download(dst_band_weights_0);
+
+ gpu_dst_pyr_laplace_.clear();
+ gpu_dst_band_weights_.clear();
+ }
else
+#endif
+ {
+ for (int i = 0; i <= num_bands_; ++i)
+ normalizeUsingWeightMap(dst_band_weights_[i], dst_pyr_laplace_[i]);
+
restoreImageFromLaplacePyr(dst_pyr_laplace_);
- Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height);
- dst_ = dst_pyr_laplace_[0](dst_rc);
- UMat _dst_mask;
- compare(dst_band_weights_[0](dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
- dst_pyr_laplace_.clear();
- dst_band_weights_.clear();
+ dst_ = dst_pyr_laplace_[0](dst_rc);
+ dst_band_weights_0 = dst_band_weights_[0];
+
+ dst_pyr_laplace_.clear();
+ dst_band_weights_.clear();
+ }
+
+ compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
Blender::blend(dst, dst_mask);
}
--- /dev/null
+#if !defined CUDA_DISABLER
+
+#include "opencv2/core/cuda/common.hpp"
+#include "opencv2/core/types.hpp"
+
+namespace cv { namespace cuda { namespace device
+{
+ namespace blend
+ {
+ __global__ void addSrcWeightKernel16S(const PtrStep<short> src, const PtrStep<short> src_weight,
+ PtrStep<short> dst, PtrStep<short> dst_weight, int rows, int cols)
+ {
+ int x = blockIdx.x * blockDim.x + threadIdx.x;
+ int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (y < rows && x < cols)
+ {
+ const short3 v = ((const short3*)src.ptr(y))[x];
+ short w = src_weight.ptr(y)[x];
+ ((short3*)dst.ptr(y))[x].x += short((v.x * w) >> 8);
+ ((short3*)dst.ptr(y))[x].y += short((v.y * w) >> 8);
+ ((short3*)dst.ptr(y))[x].z += short((v.z * w) >> 8);
+ dst_weight.ptr(y)[x] += w;
+ }
+ }
+
+ void addSrcWeightGpu16S(const PtrStep<short> src, const PtrStep<short> src_weight,
+ PtrStep<short> dst, PtrStep<short> dst_weight, cv::Rect &rc)
+ {
+ dim3 threads(16, 16);
+ dim3 grid(divUp(rc.width, threads.x), divUp(rc.height, threads.y));
+ addSrcWeightKernel16S<<<grid, threads>>>(src, src_weight, dst, dst_weight, rc.height, rc.width);
+ cudaSafeCall(cudaGetLastError());
+ }
+
+ __global__ void addSrcWeightKernel32F(const PtrStep<short> src, const PtrStepf src_weight,
+ PtrStep<short> dst, PtrStepf dst_weight, int rows, int cols)
+ {
+ int x = blockIdx.x * blockDim.x + threadIdx.x;
+ int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+ if (y < rows && x < cols)
+ {
+ const short3 v = ((const short3*)src.ptr(y))[x];
+ float w = src_weight.ptr(y)[x];
+ ((short3*)dst.ptr(y))[x].x += static_cast<short>(v.x * w);
+ ((short3*)dst.ptr(y))[x].y += static_cast<short>(v.y * w);
+ ((short3*)dst.ptr(y))[x].z += static_cast<short>(v.z * w);
+ dst_weight.ptr(y)[x] += w;
+ }
+ }
+
+ void addSrcWeightGpu32F(const PtrStep<short> src, const PtrStepf src_weight,
+ PtrStep<short> dst, PtrStepf dst_weight, cv::Rect &rc)
+ {
+ dim3 threads(16, 16);
+ dim3 grid(divUp(rc.width, threads.x), divUp(rc.height, threads.y));
+ addSrcWeightKernel32F<<<grid, threads>>>(src, src_weight, dst, dst_weight, rc.height, rc.width);
+ cudaSafeCall(cudaGetLastError());
+ }
+
+ __global__ void normalizeUsingWeightKernel16S(const PtrStep<short> weight, PtrStep<short> src,
+ const int width, const int height)
+ {
+ int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+ int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+
+ if (x < width && y < height)
+ {
+ const short3 v = ((short3*)src.ptr(y))[x];
+ short w = weight.ptr(y)[x];
+ ((short3*)src.ptr(y))[x] = make_short3(short((v.x << 8) / w),
+ short((v.y << 8) / w), short((v.z << 8) / w));
+ }
+ }
+
+ void normalizeUsingWeightMapGpu16S(const PtrStep<short> weight, PtrStep<short> src,
+ const int width, const int height)
+ {
+ dim3 threads(16, 16);
+ dim3 grid(divUp(width, threads.x), divUp(height, threads.y));
+ normalizeUsingWeightKernel16S<<<grid, threads>>> (weight, src, width, height);
+ }
+
+ __global__ void normalizeUsingWeightKernel32F(const PtrStepf weight, PtrStep<short> src,
+ const int width, const int height)
+ {
+ int x = (blockIdx.x * blockDim.x) + threadIdx.x;
+ int y = (blockIdx.y * blockDim.y) + threadIdx.y;
+
+ if (x < width && y < height)
+ {
+ static const float WEIGHT_EPS = 1e-5f;
+ const short3 v = ((short3*)src.ptr(y))[x];
+ float w = weight.ptr(y)[x];
+ ((short3*)src.ptr(y))[x] = make_short3(static_cast<short>(v.x / (w + WEIGHT_EPS)),
+ static_cast<short>(v.y / (w + WEIGHT_EPS)),
+ static_cast<short>(v.z / (w + WEIGHT_EPS)));
+ }
+ }
+
+ void normalizeUsingWeightMapGpu32F(const PtrStepf weight, PtrStep<short> src,
+ const int width, const int height)
+ {
+ dim3 threads(16, 16);
+ dim3 grid(divUp(width, threads.x), divUp(height, threads.y));
+ normalizeUsingWeightKernel32F<<<grid, threads>>> (weight, src, width, height);
+ }
+ }
+}}}
+
+#endif
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// Intel License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+#include "opencv2/ts/cuda_test.hpp"
+
+#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
+
+using namespace cv;
+using namespace std;
+
+namespace
+{
+ void multiBandBlend(const cv::Mat& im1, const cv::Mat& im2, const cv::Mat& mask1, const cv::Mat& mask2, cv::Mat& result, bool try_cuda)
+ {
+ detail::MultiBandBlender blender(try_cuda, 5);
+
+ blender.prepare(Rect(0, 0, max(im1.cols, im2.cols), max(im1.rows, im2.rows)));
+ blender.feed(im1, mask1, Point(0,0));
+ blender.feed(im2, mask2, Point(0,0));
+
+ Mat result_s, result_mask;
+ blender.blend(result_s, result_mask);
+ result_s.convertTo(result, CV_8U);
+ }
+}
+
+TEST(CUDA_MultiBandBlender, Accuracy)
+{
+ Mat image1 = imread(string(cvtest::TS::ptr()->get_data_path()) + "cv/shared/baboon.png");
+ Mat image2 = imread(string(cvtest::TS::ptr()->get_data_path()) + "cv/shared/lena.png");
+ ASSERT_EQ(image1.rows, image2.rows); ASSERT_EQ(image1.cols, image2.cols);
+
+ Mat image1s, image2s;
+ image1.convertTo(image1s, CV_16S);
+ image2.convertTo(image2s, CV_16S);
+
+ Mat mask1(image1s.size(), CV_8U);
+ mask1(Rect(0, 0, mask1.cols/2, mask1.rows)).setTo(255);
+ mask1(Rect(mask1.cols/2, 0, mask1.cols - mask1.cols/2, mask1.rows)).setTo(0);
+
+ Mat mask2(image2s.size(), CV_8U);
+ mask2(Rect(0, 0, mask2.cols/2, mask2.rows)).setTo(0);
+ mask2(Rect(mask2.cols/2, 0, mask2.cols - mask2.cols/2, mask2.rows)).setTo(255);
+
+ cv::Mat result;
+ multiBandBlend(image1s, image2s, mask1, mask2, result, false);
+
+ cv::Mat result_cuda;
+ multiBandBlend(image1s, image2s, mask1, mask2, result_cuda, true);
+
+ EXPECT_MAT_NEAR(result, result_cuda, 3);
+}
+
+#endif