modules/stitching/src/blenders.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                          License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "precomp.hpp"
  44 #include "opencl_kernels_stitching.hpp"
  45
  46 #ifdef HAVE_CUDA
  47     namespace cv { namespace cuda { namespace device
  48     {
  49         namespace blend
  50         {
  51             void addSrcWeightGpu16S(const PtrStep<short> src, const PtrStep<short> src_weight,
  52                                     PtrStep<short> dst, PtrStep<short> dst_weight, cv::Rect &rc);
  53             void addSrcWeightGpu32F(const PtrStep<short> src, const PtrStepf src_weight,
  54                                     PtrStep<short> dst, PtrStepf dst_weight, cv::Rect &rc);
  55             void normalizeUsingWeightMapGpu16S(const PtrStep<short> weight, PtrStep<short> src,
  56                                                const int width, const int height);
  57             void normalizeUsingWeightMapGpu32F(const PtrStepf weight, PtrStep<short> src,
  58                                                const int width, const int height);
  59         }
  60     }}}
  61 #endif
  62
  63 namespace cv {
  64 namespace detail {
  65
  66 static const float WEIGHT_EPS = 1e-5f;
  67
  68 Ptr<Blender> Blender::createDefault(int type, bool try_gpu)
  69 {
  70     if (type == NO)
  71         return makePtr<Blender>();
  72     if (type == FEATHER)
  73         return makePtr<FeatherBlender>();
  74     if (type == MULTI_BAND)
  75         return makePtr<MultiBandBlender>(try_gpu);
  76     CV_Error(Error::StsBadArg, "unsupported blending method");
  77     return Ptr<Blender>();
  78 }
  79
  80
  81 void Blender::prepare(const std::vector<Point> &corners, const std::vector<Size> &sizes)
  82 {
  83     prepare(resultRoi(corners, sizes));
  84 }
  85
  86
  87 void Blender::prepare(Rect dst_roi)
  88 {
  89     dst_.create(dst_roi.size(), CV_16SC3);
  90     dst_.setTo(Scalar::all(0));
  91     dst_mask_.create(dst_roi.size(), CV_8U);
  92     dst_mask_.setTo(Scalar::all(0));
  93     dst_roi_ = dst_roi;
  94 }
  95
  96
  97 void Blender::feed(InputArray _img, InputArray _mask, Point tl)
  98 {
  99     Mat img = _img.getMat();
 100     Mat mask = _mask.getMat();
 101     Mat dst = dst_.getMat(ACCESS_RW);
 102     Mat dst_mask = dst_mask_.getMat(ACCESS_RW);
 103
 104     CV_Assert(img.type() == CV_16SC3);
 105     CV_Assert(mask.type() == CV_8U);
 106     int dx = tl.x - dst_roi_.x;
 107     int dy = tl.y - dst_roi_.y;
 108
 109     for (int y = 0; y < img.rows; ++y)
 110     {
 111         const Point3_<short> *src_row = img.ptr<Point3_<short> >(y);
 112         Point3_<short> *dst_row = dst.ptr<Point3_<short> >(dy + y);
 113         const uchar *mask_row = mask.ptr<uchar>(y);
 114         uchar *dst_mask_row = dst_mask.ptr<uchar>(dy + y);
 115
 116         for (int x = 0; x < img.cols; ++x)
 117         {
 118             if (mask_row[x])
 119                 dst_row[dx + x] = src_row[x];
 120             dst_mask_row[dx + x] |= mask_row[x];
 121         }
 122     }
 123 }
 124
 125
 126 void Blender::blend(InputOutputArray dst, InputOutputArray dst_mask)
 127 {
 128     UMat mask;
 129     compare(dst_mask_, 0, mask, CMP_EQ);
 130     dst_.setTo(Scalar::all(0), mask);
 131     dst.assign(dst_);
 132     dst_mask.assign(dst_mask_);
 133     dst_.release();
 134     dst_mask_.release();
 135 }
 136
 137
 138 void FeatherBlender::prepare(Rect dst_roi)
 139 {
 140     Blender::prepare(dst_roi);
 141     dst_weight_map_.create(dst_roi.size(), CV_32F);
 142     dst_weight_map_.setTo(0);
 143 }
 144
 145
 146 void FeatherBlender::feed(InputArray _img, InputArray mask, Point tl)
 147 {
 148     Mat img = _img.getMat();
 149     Mat dst = dst_.getMat(ACCESS_RW);
 150
 151     CV_Assert(img.type() == CV_16SC3);
 152     CV_Assert(mask.type() == CV_8U);
 153
 154     createWeightMap(mask, sharpness_, weight_map_);
 155     Mat weight_map = weight_map_.getMat(ACCESS_READ);
 156     Mat dst_weight_map = dst_weight_map_.getMat(ACCESS_RW);
 157
 158     int dx = tl.x - dst_roi_.x;
 159     int dy = tl.y - dst_roi_.y;
 160
 161     for (int y = 0; y < img.rows; ++y)
 162     {
 163         const Point3_<short>* src_row = img.ptr<Point3_<short> >(y);
 164         Point3_<short>* dst_row = dst.ptr<Point3_<short> >(dy + y);
 165         const float* weight_row = weight_map.ptr<float>(y);
 166         float* dst_weight_row = dst_weight_map.ptr<float>(dy + y);
 167
 168         for (int x = 0; x < img.cols; ++x)
 169         {
 170             dst_row[dx + x].x += static_cast<short>(src_row[x].x * weight_row[x]);
 171             dst_row[dx + x].y += static_cast<short>(src_row[x].y * weight_row[x]);
 172             dst_row[dx + x].z += static_cast<short>(src_row[x].z * weight_row[x]);
 173             dst_weight_row[dx + x] += weight_row[x];
 174         }
 175     }
 176 }
 177
 178
 179 void FeatherBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
 180 {
 181     normalizeUsingWeightMap(dst_weight_map_, dst_);
 182     compare(dst_weight_map_, WEIGHT_EPS, dst_mask_, CMP_GT);
 183     Blender::blend(dst, dst_mask);
 184 }
 185
 186
 187 Rect FeatherBlender::createWeightMaps(const std::vector<UMat> &masks, const std::vector<Point> &corners,
 188                                       std::vector<UMat> &weight_maps)
 189 {
 190     weight_maps.resize(masks.size());
 191     for (size_t i = 0; i < masks.size(); ++i)
 192         createWeightMap(masks[i], sharpness_, weight_maps[i]);
 193
 194     Rect dst_roi = resultRoi(corners, masks);
 195     Mat weights_sum(dst_roi.size(), CV_32F);
 196     weights_sum.setTo(0);
 197
 198     for (size_t i = 0; i < weight_maps.size(); ++i)
 199     {
 200         Rect roi(corners[i].x - dst_roi.x, corners[i].y - dst_roi.y,
 201                  weight_maps[i].cols, weight_maps[i].rows);
 202         add(weights_sum(roi), weight_maps[i], weights_sum(roi));
 203     }
 204
 205     for (size_t i = 0; i < weight_maps.size(); ++i)
 206     {
 207         Rect roi(corners[i].x - dst_roi.x, corners[i].y - dst_roi.y,
 208                  weight_maps[i].cols, weight_maps[i].rows);
 209         Mat tmp = weights_sum(roi);
 210         tmp.setTo(1, tmp < std::numeric_limits<float>::epsilon());
 211         divide(weight_maps[i], tmp, weight_maps[i]);
 212     }
 213
 214     return dst_roi;
 215 }
 216
 217
 218 MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands, int weight_type)
 219 {
 220     num_bands_ = 0;
 221     setNumBands(num_bands);
 222
 223 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
 224     can_use_gpu_ = try_gpu && cuda::getCudaEnabledDeviceCount();
 225 #else
 226     (void) try_gpu;
 227     can_use_gpu_ = false;
 228 #endif
 229
 230     CV_Assert(weight_type == CV_32F || weight_type == CV_16S);
 231     weight_type_ = weight_type;
 232 }
 233
 234
 235 void MultiBandBlender::prepare(Rect dst_roi)
 236 {
 237     dst_roi_final_ = dst_roi;
 238
 239     // Crop unnecessary bands
 240     double max_len = static_cast<double>(std::max(dst_roi.width, dst_roi.height));
 241     num_bands_ = std::min(actual_num_bands_, static_cast<int>(ceil(std::log(max_len) / std::log(2.0))));
 242
 243     // Add border to the final image, to ensure sizes are divided by (1 << num_bands_)
 244     dst_roi.width += ((1 << num_bands_) - dst_roi.width % (1 << num_bands_)) % (1 << num_bands_);
 245     dst_roi.height += ((1 << num_bands_) - dst_roi.height % (1 << num_bands_)) % (1 << num_bands_);
 246
 247     Blender::prepare(dst_roi);
 248
 249 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
 250     if (can_use_gpu_)
 251     {
 252         gpu_dst_pyr_laplace_.resize(num_bands_ + 1);
 253         gpu_dst_pyr_laplace_[0].create(dst_roi.size(), CV_16SC3);
 254         gpu_dst_pyr_laplace_[0].setTo(Scalar::all(0));
 255
 256         gpu_dst_band_weights_.resize(num_bands_ + 1);
 257         gpu_dst_band_weights_[0].create(dst_roi.size(), weight_type_);
 258         gpu_dst_band_weights_[0].setTo(0);
 259
 260         for (int i = 1; i <= num_bands_; ++i)
 261         {
 262             gpu_dst_pyr_laplace_[i].create((gpu_dst_pyr_laplace_[i - 1].rows + 1) / 2,
 263                 (gpu_dst_pyr_laplace_[i - 1].cols + 1) / 2, CV_16SC3);
 264             gpu_dst_band_weights_[i].create((gpu_dst_band_weights_[i - 1].rows + 1) / 2,
 265                 (gpu_dst_band_weights_[i - 1].cols + 1) / 2, weight_type_);
 266             gpu_dst_pyr_laplace_[i].setTo(Scalar::all(0));
 267             gpu_dst_band_weights_[i].setTo(0);
 268         }
 269     }
 270     else
 271 #endif
 272     {
 273         dst_pyr_laplace_.resize(num_bands_ + 1);
 274         dst_pyr_laplace_[0] = dst_;
 275
 276         dst_band_weights_.resize(num_bands_ + 1);
 277         dst_band_weights_[0].create(dst_roi.size(), weight_type_);
 278         dst_band_weights_[0].setTo(0);
 279
 280         for (int i = 1; i <= num_bands_; ++i)
 281         {
 282             dst_pyr_laplace_[i].create((dst_pyr_laplace_[i - 1].rows + 1) / 2,
 283                 (dst_pyr_laplace_[i - 1].cols + 1) / 2, CV_16SC3);
 284             dst_band_weights_[i].create((dst_band_weights_[i - 1].rows + 1) / 2,
 285                 (dst_band_weights_[i - 1].cols + 1) / 2, weight_type_);
 286             dst_pyr_laplace_[i].setTo(Scalar::all(0));
 287             dst_band_weights_[i].setTo(0);
 288         }
 289     }
 290 }
 291
 292 #ifdef HAVE_OPENCL
 293 static bool ocl_MultiBandBlender_feed(InputArray _src, InputArray _weight,
 294         InputOutputArray _dst, InputOutputArray _dst_weight)
 295 {
 296     String buildOptions = "-D DEFINE_feed";
 297     ocl::buildOptionsAddMatrixDescription(buildOptions, "src", _src);
 298     ocl::buildOptionsAddMatrixDescription(buildOptions, "weight", _weight);
 299     ocl::buildOptionsAddMatrixDescription(buildOptions, "dst", _dst);
 300     ocl::buildOptionsAddMatrixDescription(buildOptions, "dstWeight", _dst_weight);
 301     ocl::Kernel k("feed", ocl::stitching::multibandblend_oclsrc, buildOptions);
 302     if (k.empty())
 303         return false;
 304
 305     UMat src = _src.getUMat();
 306
 307     k.args(ocl::KernelArg::ReadOnly(src),
 308            ocl::KernelArg::ReadOnly(_weight.getUMat()),
 309            ocl::KernelArg::ReadWrite(_dst.getUMat()),
 310            ocl::KernelArg::ReadWrite(_dst_weight.getUMat())
 311            );
 312
 313     size_t globalsize[2] = {(size_t)src.cols, (size_t)src.rows };
 314     return k.run(2, globalsize, NULL, false);
 315 }
 316 #endif
 317
 318 void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
 319 {
 320 #if ENABLE_LOG
 321     int64 t = getTickCount();
 322 #endif
 323
 324     UMat img = _img.getUMat();
 325     CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3);
 326     CV_Assert(mask.type() == CV_8U);
 327
 328     // Keep source image in memory with small border
 329     int gap = 3 * (1 << num_bands_);
 330     Point tl_new(std::max(dst_roi_.x, tl.x - gap),
 331                  std::max(dst_roi_.y, tl.y - gap));
 332     Point br_new(std::min(dst_roi_.br().x, tl.x + img.cols + gap),
 333                  std::min(dst_roi_.br().y, tl.y + img.rows + gap));
 334
 335     // Ensure coordinates of top-left, bottom-right corners are divided by (1 << num_bands_).
 336     // After that scale between layers is exactly 2.
 337     //
 338     // We do it to avoid interpolation problems when keeping sub-images only. There is no such problem when
 339     // image is bordered to have size equal to the final image size, but this is too memory hungry approach.
 340     tl_new.x = dst_roi_.x + (((tl_new.x - dst_roi_.x) >> num_bands_) << num_bands_);
 341     tl_new.y = dst_roi_.y + (((tl_new.y - dst_roi_.y) >> num_bands_) << num_bands_);
 342     int width = br_new.x - tl_new.x;
 343     int height = br_new.y - tl_new.y;
 344     width += ((1 << num_bands_) - width % (1 << num_bands_)) % (1 << num_bands_);
 345     height += ((1 << num_bands_) - height % (1 << num_bands_)) % (1 << num_bands_);
 346     br_new.x = tl_new.x + width;
 347     br_new.y = tl_new.y + height;
 348     int dy = std::max(br_new.y - dst_roi_.br().y, 0);
 349     int dx = std::max(br_new.x - dst_roi_.br().x, 0);
 350     tl_new.x -= dx; br_new.x -= dx;
 351     tl_new.y -= dy; br_new.y -= dy;
 352
 353     int top = tl.y - tl_new.y;
 354     int left = tl.x - tl_new.x;
 355     int bottom = br_new.y - tl.y - img.rows;
 356     int right = br_new.x - tl.x - img.cols;
 357
 358 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
 359     if (can_use_gpu_)
 360     {
 361         // Create the source image Laplacian pyramid
 362         cuda::GpuMat gpu_img;
 363         gpu_img.upload(img);
 364         cuda::GpuMat img_with_border;
 365         cuda::copyMakeBorder(gpu_img, img_with_border, top, bottom, left, right, BORDER_REFLECT);
 366         std::vector<cuda::GpuMat> gpu_src_pyr_laplace(num_bands_ + 1);
 367         img_with_border.convertTo(gpu_src_pyr_laplace[0], CV_16S);
 368         for (int i = 0; i < num_bands_; ++i)
 369             cuda::pyrDown(gpu_src_pyr_laplace[i], gpu_src_pyr_laplace[i + 1]);
 370         for (int i = 0; i < num_bands_; ++i)
 371         {
 372             cuda::GpuMat up;
 373             cuda::pyrUp(gpu_src_pyr_laplace[i + 1], up);
 374             cuda::subtract(gpu_src_pyr_laplace[i], up, gpu_src_pyr_laplace[i]);
 375         }
 376
 377         // Create the weight map Gaussian pyramid
 378         cuda::GpuMat gpu_mask;
 379         gpu_mask.upload(mask);
 380         cuda::GpuMat weight_map;
 381         std::vector<cuda::GpuMat> gpu_weight_pyr_gauss(num_bands_ + 1);
 382
 383         if (weight_type_ == CV_32F)
 384         {
 385             gpu_mask.convertTo(weight_map, CV_32F, 1. / 255.);
 386         }
 387         else // weight_type_ == CV_16S
 388         {
 389             gpu_mask.convertTo(weight_map, CV_16S);
 390             cuda::GpuMat add_mask;
 391             cuda::compare(gpu_mask, 0, add_mask, CMP_NE);
 392             cuda::add(weight_map, Scalar::all(1), weight_map, add_mask);
 393         }
 394         cuda::copyMakeBorder(weight_map, gpu_weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT);
 395         for (int i = 0; i < num_bands_; ++i)
 396             cuda::pyrDown(gpu_weight_pyr_gauss[i], gpu_weight_pyr_gauss[i + 1]);
 397
 398         int y_tl = tl_new.y - dst_roi_.y;
 399         int y_br = br_new.y - dst_roi_.y;
 400         int x_tl = tl_new.x - dst_roi_.x;
 401         int x_br = br_new.x - dst_roi_.x;
 402
 403         // Add weighted layer of the source image to the final Laplacian pyramid layer
 404         for (int i = 0; i <= num_bands_; ++i)
 405         {
 406             Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
 407             cuda::GpuMat &_src_pyr_laplace = gpu_src_pyr_laplace[i];
 408             cuda::GpuMat _dst_pyr_laplace = gpu_dst_pyr_laplace_[i](rc);
 409             cuda::GpuMat &_weight_pyr_gauss = gpu_weight_pyr_gauss[i];
 410             cuda::GpuMat _dst_band_weights = gpu_dst_band_weights_[i](rc);
 411
 412             using namespace cv::cuda::device::blend;
 413             if (weight_type_ == CV_32F)
 414             {
 415                 addSrcWeightGpu32F(_src_pyr_laplace, _weight_pyr_gauss, _dst_pyr_laplace, _dst_band_weights, rc);
 416             }
 417             else
 418             {
 419                 addSrcWeightGpu16S(_src_pyr_laplace, _weight_pyr_gauss, _dst_pyr_laplace, _dst_band_weights, rc);
 420             }
 421             x_tl /= 2; y_tl /= 2;
 422             x_br /= 2; y_br /= 2;
 423         }
 424         return;
 425     }
 426 #endif
 427
 428     // Create the source image Laplacian pyramid
 429     UMat img_with_border;
 430     copyMakeBorder(_img, img_with_border, top, bottom, left, right,
 431                    BORDER_REFLECT);
 432     LOGLN("  Add border to the source image, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
 433 #if ENABLE_LOG
 434     t = getTickCount();
 435 #endif
 436
 437     std::vector<UMat> src_pyr_laplace;
 438     createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace);
 439
 440     LOGLN("  Create the source image Laplacian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
 441 #if ENABLE_LOG
 442     t = getTickCount();
 443 #endif
 444
 445     // Create the weight map Gaussian pyramid
 446     UMat weight_map;
 447     std::vector<UMat> weight_pyr_gauss(num_bands_ + 1);
 448
 449     if(weight_type_ == CV_32F)
 450     {
 451         mask.getUMat().convertTo(weight_map, CV_32F, 1./255.);
 452     }
 453     else // weight_type_ == CV_16S
 454     {
 455         mask.getUMat().convertTo(weight_map, CV_16S);
 456         UMat add_mask;
 457         compare(mask, 0, add_mask, CMP_NE);
 458         add(weight_map, Scalar::all(1), weight_map, add_mask);
 459     }
 460
 461     copyMakeBorder(weight_map, weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT);
 462
 463     for (int i = 0; i < num_bands_; ++i)
 464         pyrDown(weight_pyr_gauss[i], weight_pyr_gauss[i + 1]);
 465
 466     LOGLN("  Create the weight map Gaussian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
 467 #if ENABLE_LOG
 468     t = getTickCount();
 469 #endif
 470
 471     int y_tl = tl_new.y - dst_roi_.y;
 472     int y_br = br_new.y - dst_roi_.y;
 473     int x_tl = tl_new.x - dst_roi_.x;
 474     int x_br = br_new.x - dst_roi_.x;
 475
 476     // Add weighted layer of the source image to the final Laplacian pyramid layer
 477     for (int i = 0; i <= num_bands_; ++i)
 478     {
 479         Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
 480 #ifdef HAVE_OPENCL
 481         if ( !cv::ocl::isOpenCLActivated() ||
 482              !ocl_MultiBandBlender_feed(src_pyr_laplace[i], weight_pyr_gauss[i],
 483                     dst_pyr_laplace_[i](rc), dst_band_weights_[i](rc)) )
 484 #endif
 485         {
 486             Mat _src_pyr_laplace = src_pyr_laplace[i].getMat(ACCESS_READ);
 487             Mat _dst_pyr_laplace = dst_pyr_laplace_[i](rc).getMat(ACCESS_RW);
 488             Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ);
 489             Mat _dst_band_weights = dst_band_weights_[i](rc).getMat(ACCESS_RW);
 490             if(weight_type_ == CV_32F)
 491             {
 492                 for (int y = 0; y < rc.height; ++y)
 493                 {
 494                     const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y);
 495                     Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y);
 496                     const float* weight_row = _weight_pyr_gauss.ptr<float>(y);
 497                     float* dst_weight_row = _dst_band_weights.ptr<float>(y);
 498
 499                     for (int x = 0; x < rc.width; ++x)
 500                     {
 501                         dst_row[x].x += static_cast<short>(src_row[x].x * weight_row[x]);
 502                         dst_row[x].y += static_cast<short>(src_row[x].y * weight_row[x]);
 503                         dst_row[x].z += static_cast<short>(src_row[x].z * weight_row[x]);
 504                         dst_weight_row[x] += weight_row[x];
 505                     }
 506                 }
 507             }
 508             else // weight_type_ == CV_16S
 509             {
 510                 for (int y = 0; y < y_br - y_tl; ++y)
 511                 {
 512                     const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y);
 513                     Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y);
 514                     const short* weight_row = _weight_pyr_gauss.ptr<short>(y);
 515                     short* dst_weight_row = _dst_band_weights.ptr<short>(y);
 516
 517                     for (int x = 0; x < x_br - x_tl; ++x)
 518                     {
 519                         dst_row[x].x += short((src_row[x].x * weight_row[x]) >> 8);
 520                         dst_row[x].y += short((src_row[x].y * weight_row[x]) >> 8);
 521                         dst_row[x].z += short((src_row[x].z * weight_row[x]) >> 8);
 522                         dst_weight_row[x] += weight_row[x];
 523                     }
 524                 }
 525             }
 526         }
 527 #ifdef HAVE_OPENCL
 528         else
 529         {
 530             CV_IMPL_ADD(CV_IMPL_OCL);
 531         }
 532 #endif
 533
 534         x_tl /= 2; y_tl /= 2;
 535         x_br /= 2; y_br /= 2;
 536     }
 537
 538     LOGLN("  Add weighted layer of the source image to the final Laplacian pyramid layer, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
 539 }
 540
 541
 542 void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
 543 {
 544     cv::UMat dst_band_weights_0;
 545     Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height);
 546 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
 547     if (can_use_gpu_)
 548     {
 549         for (int i = 0; i <= num_bands_; ++i)
 550         {
 551             cuda::GpuMat dst_i = gpu_dst_pyr_laplace_[i];
 552             cuda::GpuMat weight_i = gpu_dst_band_weights_[i];
 553
 554             using namespace ::cv::cuda::device::blend;
 555             if (weight_type_ == CV_32F)
 556             {
 557                 normalizeUsingWeightMapGpu32F(weight_i, dst_i, weight_i.cols, weight_i.rows);
 558             }
 559             else
 560             {
 561                 normalizeUsingWeightMapGpu16S(weight_i, dst_i, weight_i.cols, weight_i.rows);
 562             }
 563         }
 564
 565         // Restore image from Laplacian pyramid
 566         for (size_t i = num_bands_; i > 0; --i)
 567         {
 568             cuda::GpuMat up;
 569             cuda::pyrUp(gpu_dst_pyr_laplace_[i], up);
 570             cuda::add(up, gpu_dst_pyr_laplace_[i - 1], gpu_dst_pyr_laplace_[i - 1]);
 571         }
 572
 573         gpu_dst_pyr_laplace_[0](dst_rc).download(dst_);
 574         gpu_dst_band_weights_[0].download(dst_band_weights_0);
 575
 576         gpu_dst_pyr_laplace_.clear();
 577         gpu_dst_band_weights_.clear();
 578     }
 579     else
 580 #endif
 581     {
 582         for (int i = 0; i <= num_bands_; ++i)
 583             normalizeUsingWeightMap(dst_band_weights_[i], dst_pyr_laplace_[i]);
 584
 585         restoreImageFromLaplacePyr(dst_pyr_laplace_);
 586
 587         dst_ = dst_pyr_laplace_[0](dst_rc);
 588         dst_band_weights_0 = dst_band_weights_[0];
 589
 590         dst_pyr_laplace_.clear();
 591         dst_band_weights_.clear();
 592     }
 593
 594     compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
 595
 596     Blender::blend(dst, dst_mask);
 597 }
 598
 599
 600 //////////////////////////////////////////////////////////////////////////////
 601 // Auxiliary functions
 602
 603 #ifdef HAVE_OPENCL
 604 static bool ocl_normalizeUsingWeightMap(InputArray _weight, InputOutputArray _mat)
 605 {
 606     String buildOptions = "-D DEFINE_normalizeUsingWeightMap";
 607     ocl::buildOptionsAddMatrixDescription(buildOptions, "mat", _mat);
 608     ocl::buildOptionsAddMatrixDescription(buildOptions, "weight", _weight);
 609     ocl::Kernel k("normalizeUsingWeightMap", ocl::stitching::multibandblend_oclsrc, buildOptions);
 610     if (k.empty())
 611         return false;
 612
 613     UMat mat = _mat.getUMat();
 614
 615     k.args(ocl::KernelArg::ReadWrite(mat),
 616            ocl::KernelArg::ReadOnly(_weight.getUMat())
 617            );
 618
 619     size_t globalsize[2] = {(size_t)mat.cols, (size_t)mat.rows };
 620     return k.run(2, globalsize, NULL, false);
 621 }
 622 #endif
 623
 624 void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
 625 {
 626     Mat src;
 627     Mat weight;
 628
 629 #ifdef HAVE_OPENCL
 630     if ( !cv::ocl::isOpenCLActivated() ||
 631             !ocl_normalizeUsingWeightMap(_weight, _src) )
 632 #endif
 633     {
 634         src = _src.getMat();
 635         weight = _weight.getMat();
 636
 637         CV_Assert(src.type() == CV_16SC3);
 638
 639         if (weight.type() == CV_32FC1)
 640         {
 641             for (int y = 0; y < src.rows; ++y)
 642             {
 643                 Point3_<short> *row = src.ptr<Point3_<short> >(y);
 644                 const float *weight_row = weight.ptr<float>(y);
 645
 646                 for (int x = 0; x < src.cols; ++x)
 647                 {
 648                     row[x].x = static_cast<short>(row[x].x / (weight_row[x] + WEIGHT_EPS));
 649                     row[x].y = static_cast<short>(row[x].y / (weight_row[x] + WEIGHT_EPS));
 650                     row[x].z = static_cast<short>(row[x].z / (weight_row[x] + WEIGHT_EPS));
 651                 }
 652             }
 653         }
 654         else
 655         {
 656             CV_Assert(weight.type() == CV_16SC1);
 657
 658             for (int y = 0; y < src.rows; ++y)
 659             {
 660                 const short *weight_row = weight.ptr<short>(y);
 661                 Point3_<short> *row = src.ptr<Point3_<short> >(y);
 662
 663                 for (int x = 0; x < src.cols; ++x)
 664                 {
 665                     int w = weight_row[x] + 1;
 666                     row[x].x = static_cast<short>((row[x].x << 8) / w);
 667                     row[x].y = static_cast<short>((row[x].y << 8) / w);
 668                     row[x].z = static_cast<short>((row[x].z << 8) / w);
 669                 }
 670             }
 671         }
 672     }
 673 #ifdef HAVE_OPENCL
 674     else
 675     {
 676         CV_IMPL_ADD(CV_IMPL_OCL);
 677     }
 678 #endif
 679 }
 680
 681
 682 void createWeightMap(InputArray mask, float sharpness, InputOutputArray weight)
 683 {
 684     CV_Assert(mask.type() == CV_8U);
 685     distanceTransform(mask, weight, DIST_L1, 3);
 686     UMat tmp;
 687     multiply(weight, sharpness, tmp);
 688     threshold(tmp, weight, 1.f, 1.f, THRESH_TRUNC);
 689 }
 690
 691
 692 void createLaplacePyr(InputArray img, int num_levels, std::vector<UMat> &pyr)
 693 {
 694     pyr.resize(num_levels + 1);
 695
 696     if(img.depth() == CV_8U)
 697     {
 698         if(num_levels == 0)
 699         {
 700             img.getUMat().convertTo(pyr[0], CV_16S);
 701             return;
 702         }
 703
 704         UMat downNext;
 705         UMat current = img.getUMat();
 706         pyrDown(img, downNext);
 707
 708         for(int i = 1; i < num_levels; ++i)
 709         {
 710             UMat lvl_up;
 711             UMat lvl_down;
 712
 713             pyrDown(downNext, lvl_down);
 714             pyrUp(downNext, lvl_up, current.size());
 715             subtract(current, lvl_up, pyr[i-1], noArray(), CV_16S);
 716
 717             current = downNext;
 718             downNext = lvl_down;
 719         }
 720
 721         {
 722             UMat lvl_up;
 723             pyrUp(downNext, lvl_up, current.size());
 724             subtract(current, lvl_up, pyr[num_levels-1], noArray(), CV_16S);
 725
 726             downNext.convertTo(pyr[num_levels], CV_16S);
 727         }
 728     }
 729     else
 730     {
 731         pyr[0] = img.getUMat();
 732         for (int i = 0; i < num_levels; ++i)
 733             pyrDown(pyr[i], pyr[i + 1]);
 734         UMat tmp;
 735         for (int i = 0; i < num_levels; ++i)
 736         {
 737             pyrUp(pyr[i + 1], tmp, pyr[i].size());
 738             subtract(pyr[i], tmp, pyr[i]);
 739         }
 740     }
 741 }
 742
 743
 744 void createLaplacePyrGpu(InputArray img, int num_levels, std::vector<UMat> &pyr)
 745 {
 746 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
 747     pyr.resize(num_levels + 1);
 748
 749     std::vector<cuda::GpuMat> gpu_pyr(num_levels + 1);
 750     gpu_pyr[0].upload(img);
 751     for (int i = 0; i < num_levels; ++i)
 752         cuda::pyrDown(gpu_pyr[i], gpu_pyr[i + 1]);
 753
 754     cuda::GpuMat tmp;
 755     for (int i = 0; i < num_levels; ++i)
 756     {
 757         cuda::pyrUp(gpu_pyr[i + 1], tmp);
 758         cuda::subtract(gpu_pyr[i], tmp, gpu_pyr[i]);
 759         gpu_pyr[i].download(pyr[i]);
 760     }
 761
 762     gpu_pyr[num_levels].download(pyr[num_levels]);
 763 #else
 764     (void)img;
 765     (void)num_levels;
 766     (void)pyr;
 767     CV_Error(Error::StsNotImplemented, "CUDA optimization is unavailable");
 768 #endif
 769 }
 770
 771
 772 void restoreImageFromLaplacePyr(std::vector<UMat> &pyr)
 773 {
 774     if (pyr.empty())
 775         return;
 776     UMat tmp;
 777     for (size_t i = pyr.size() - 1; i > 0; --i)
 778     {
 779         pyrUp(pyr[i], tmp, pyr[i - 1].size());
 780         add(tmp, pyr[i - 1], pyr[i - 1]);
 781     }
 782 }
 783
 784
 785 void restoreImageFromLaplacePyrGpu(std::vector<UMat> &pyr)
 786 {
 787 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
 788     if (pyr.empty())
 789         return;
 790
 791     std::vector<cuda::GpuMat> gpu_pyr(pyr.size());
 792     for (size_t i = 0; i < pyr.size(); ++i)
 793         gpu_pyr[i].upload(pyr[i]);
 794
 795     cuda::GpuMat tmp;
 796     for (size_t i = pyr.size() - 1; i > 0; --i)
 797     {
 798         cuda::pyrUp(gpu_pyr[i], tmp);
 799         cuda::add(tmp, gpu_pyr[i - 1], gpu_pyr[i - 1]);
 800     }
 801
 802     gpu_pyr[0].download(pyr[0]);
 803 #else
 804     (void)pyr;
 805     CV_Error(Error::StsNotImplemented, "CUDA optimization is unavailable");
 806 #endif
 807 }
 808
 809 } // namespace detail
 810 } // namespace cv