implemented gpu::copyMakeBorder for all border modes

author Vladislav Vinogradov <no@email>

Wed, 21 Sep 2011 07:02:45 +0000 (07:02 +0000)

committer Vladislav Vinogradov <no@email>

Wed, 21 Sep 2011 07:02:45 +0000 (07:02 +0000)
author Vladislav Vinogradov <no@email>
Wed, 21 Sep 2011 07:02:45 +0000 (07:02 +0000)
committer Vladislav Vinogradov <no@email>
Wed, 21 Sep 2011 07:02:45 +0000 (07:02 +0000)
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp

index ffbc0da..dbb1107 100644 (file)
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -662,8 +662,7 @@ namespace cv
          CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());\r
  \r
          //! copies 2D array to a larger destination array and pads borders with user-specifiable constant\r
-        //! supports CV_8UC1, CV_8UC4, CV_32SC1 and CV_32FC1 types\r
-        CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value = Scalar(), Stream& stream = Stream::Null());\r
+        CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null());\r
  \r
          //! computes the integral image\r
          //! sum will have CV_32S type, but will contain unsigned int values\r
diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp

index b2acf4d..98e7edd 100644 (file)
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -460,13 +460,15 @@ PERF_TEST_P(DevInfo_Size_MatType_Interpolation, rotate, testing::Combine(testing
      SANITY_CHECK(dst_host);\r
  }\r
  \r
-PERF_TEST_P(DevInfo_Size_MatType, copyMakeBorder, testing::Combine(testing::ValuesIn(devices()),\r
-                                                                         testing::Values(GPU_TYPICAL_MAT_SIZES), \r
-                                                                         testing::Values(CV_8UC1, CV_8UC4, CV_32SC1)))\r
+PERF_TEST_P(DevInfo_Size_MatType_BorderMode, copyMakeBorder, testing::Combine(testing::ValuesIn(devices()),\r
+                                                                              testing::Values(GPU_TYPICAL_MAT_SIZES), \r
+                                                                              testing::Values(CV_8UC1, CV_8UC4, CV_32FC1), \r
+                                                                              testing::Values((int)BORDER_REPLICATE, (int)BORDER_CONSTANT)))\r
  {\r
      DeviceInfo devInfo = std::tr1::get<0>(GetParam());\r
      Size size = std::tr1::get<1>(GetParam());\r
      int type = std::tr1::get<2>(GetParam());\r
+    int borderType = std::tr1::get<3>(GetParam());\r
  \r
      setDevice(devInfo.deviceID());\r
  \r
@@ -481,7 +483,7 @@ PERF_TEST_P(DevInfo_Size_MatType, copyMakeBorder, testing::Combine(testing::Valu
  \r
      SIMPLE_TEST_CYCLE()\r
      {\r
-        copyMakeBorder(src, dst, 5, 5, 5, 5);\r
+        copyMakeBorder(src, dst, 5, 5, 5, 5, borderType);\r
      }\r
  \r
      Mat dst_host(dst);\r
diff --git a/modules/gpu/perf/perf_utility.hpp b/modules/gpu/perf/perf_utility.hpp

index ec6b052..a57e367 100644 (file)
--- a/modules/gpu/perf/perf_utility.hpp
+++ b/modules/gpu/perf/perf_utility.hpp
@@ -49,6 +49,7 @@ typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, NormType> > DevInfo
  typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, NormType> > DevInfo_Size_MatType_NormType;\r
  typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, int> > DevInfo_DescSize;\r
  typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, int, int> > DevInfo_K_DescSize;\r
+typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, BorderMode> > DevInfo_Size_MatType_BorderMode;\r
  \r
  const cv::Size sz1800x1500 = cv::Size(1800, 1500);\r
  const cv::Size sz4700x3000 = cv::Size(4700, 3000);\r
diff --git a/modules/gpu/src/cuda/column_filter.cu b/modules/gpu/src/cuda/column_filter.cu

index 3c32769..b2eaa72 100644 (file)
--- a/modules/gpu/src/cuda/column_filter.cu
+++ b/modules/gpu/src/cuda/column_filter.cu
@@ -109,12 +109,6 @@ namespace cv { namespace gpu { namespace filters
  \r
          B<T> b(src.rows);\r
  \r
-        if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))\r
-        {\r
-            cv::gpu::error("linearColumnFilter: can't use specified border extrapolation, image is too small, "\r
-                           "try bigger image or another border extrapolation mode", __FILE__, __LINE__);\r
-        }\r
-\r
          filter_krnls_column::linearColumnFilter<ksize, T, D><<<grid, threads, 0, stream>>>(src, dst, anchor, b);\r
          cudaSafeCall( cudaGetLastError() );\r
  \r
diff --git a/modules/gpu/src/cuda/copy_make_border.cu b/modules/gpu/src/cuda/copy_make_border.cu

new file mode 100644 (file)

index 0000000..8d70393
--- /dev/null
+++ b/modules/gpu/src/cuda/copy_make_border.cu
@@ -0,0 +1,127 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+//  By downloading, copying, installing or using the software you agree to this license.\r
+//  If you do not agree to this license, do not download, install,\r
+//  copy or use the software.\r
+//\r
+//\r
+//                           License Agreement\r
+//                For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+//   * Redistribution's of source code must retain the above copyright notice,\r
+//     this list of conditions and the following disclaimer.\r
+//\r
+//   * Redistribution's in binary form must reproduce the above copyright notice,\r
+//     this list of conditions and the following disclaimer in the documentation\r
+//     and/or other materials provided with the distribution.\r
+//\r
+//   * The name of the copyright holders may not be used to endorse or promote products\r
+//     derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#include "internal_shared.hpp"\r
+#include "opencv2/gpu/device/border_interpolate.hpp"\r
+\r
+using namespace cv::gpu;\r
+using namespace cv::gpu::device;\r
+\r
+namespace cv { namespace gpu { namespace imgproc\r
+{\r
+    template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_<T> dst, int top, int left)\r
+    {\r
+        const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+        const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+        if (x < dst.cols && y < dst.rows)\r
+            dst.ptr(y)[x] = src(y - top, x - left);\r
+    }\r
+\r
+    template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher\r
+    {\r
+        static void call(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int top, int left, \r
+            const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)\r
+        {        \r
+            dim3 block(32, 8);\r
+            dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
+\r
+            B<T> brd(src.rows, src.cols, VecTraits<T>::make(borderValue));\r
+            BorderReader< PtrStep_<T>, B<T> > brdSrc(src, brd);\r
+\r
+            copyMakeBorder<<<grid, block, 0, stream>>>(brdSrc, dst, top, left);\r
+            cudaSafeCall( cudaGetLastError() );\r
+\r
+            if (stream == 0)\r
+                cudaSafeCall( cudaDeviceSynchronize() );\r
+        }\r
+    };\r
+\r
+    template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, \r
+        const T* borderValue, cudaStream_t stream)\r
+    {\r
+        typedef typename TypeVec<T, cn>::vec_type vec_type;\r
+\r
+        typedef void (*caller_t)(const DevMem2D_<vec_type>& src, const DevMem2D_<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);\r
+\r
+        static const caller_t callers[5] = \r
+        {\r
+            CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call, \r
+            CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call, \r
+            CopyMakeBorderDispatcher<BrdConstant, vec_type>::call, \r
+            CopyMakeBorderDispatcher<BrdReflect, vec_type>::call, \r
+            CopyMakeBorderDispatcher<BrdWrap, vec_type>::call \r
+        };\r
+\r
+        callers[borderMode](DevMem2D_<vec_type>(src), DevMem2D_<vec_type>(dst), top, left, borderValue, stream);\r
+    }\r
+\r
+    template void copyMakeBorder_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);\r
+    //template void copyMakeBorder_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);\r
+    template void copyMakeBorder_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);\r
+    template void copyMakeBorder_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);\r
+    \r
+    //template void copyMakeBorder_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);\r
+    //template void copyMakeBorder_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);\r
+    //template void copyMakeBorder_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);\r
+    //template void copyMakeBorder_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);\r
+    \r
+    template void copyMakeBorder_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);\r
+    //template void copyMakeBorder_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);\r
+    template void copyMakeBorder_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);\r
+    template void copyMakeBorder_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);\r
+    \r
+    template void copyMakeBorder_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);\r
+    //template void copyMakeBorder_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);\r
+    template void copyMakeBorder_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);\r
+    template void copyMakeBorder_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);\r
+    \r
+    //template void copyMakeBorder_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);\r
+    //template void copyMakeBorder_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);\r
+    //template void copyMakeBorder_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);\r
+    //template void copyMakeBorder_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);\r
+    \r
+    template void copyMakeBorder_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);\r
+    //template void copyMakeBorder_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);\r
+    template void copyMakeBorder_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);\r
+    template void copyMakeBorder_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);\r
+}}}\r
diff --git a/modules/gpu/src/cuda/row_filter.cu b/modules/gpu/src/cuda/row_filter.cu

index aa40372..44de9ca 100644 (file)
--- a/modules/gpu/src/cuda/row_filter.cu
+++ b/modules/gpu/src/cuda/row_filter.cu
@@ -125,12 +125,6 @@ namespace cv { namespace gpu { namespace filters
          typedef typename filter_krnls_row::SmemType<T>::smem_t smem_t;\r
          B<smem_t> b(src.cols);\r
  \r
-        if (!b.is_range_safe(-BLOCK_DIM_X, (grid.x + 1) * BLOCK_DIM_X - 1))\r
-        {\r
-            cv::gpu::error("linearRowFilter: can't use specified border extrapolation, image is too small, "\r
-                           "try bigger image or another border extrapolation mode", __FILE__, __LINE__);\r
-        }\r
-\r
          filter_krnls_row::linearRowFilter<ksize, T, D><<<grid, threads, 0, stream>>>(src, dst, anchor, b);\r
          cudaSafeCall( cudaGetLastError() );\r
  \r
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp

index 753cc4e..87d3b5d 100644 (file)
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -53,7 +53,7 @@ void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCrite
  void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, Stream&) { throw_nogpu(); }\r
  void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }\r
-void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }\r
+void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }\r
  void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
@@ -360,60 +360,99 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
  ////////////////////////////////////////////////////////////////////////\r
  // copyMakeBorder\r
  \r
-void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value, Stream& s)\r
+namespace cv { namespace gpu {  namespace imgproc\r
  {\r
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1);\r
+    template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);\r
+}}}\r
  \r
-    dst.create(src.rows + top + bottom, src.cols + left + right, src.type());\r
+namespace\r
+{\r
+    template <typename T, int cn> void copyMakeBorder_caller(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)\r
+    {\r
+        Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));\r
  \r
-    NppiSize srcsz;\r
-    srcsz.width  = src.cols;\r
-    srcsz.height = src.rows;\r
-    NppiSize dstsz;\r
-    dstsz.width  = dst.cols;\r
-    dstsz.height = dst.rows;\r
+        imgproc::copyMakeBorder_gpu<T, cn>(src, dst, top, left, borderType, val.val, stream);\r
+    }\r
+}\r
  \r
-    cudaStream_t stream = StreamAccessor::getStream(s);\r
+void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s)\r
+{\r
+    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
+    CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);\r
  \r
-    NppStreamHandler h(stream);\r
+    dst.create(src.rows + top + bottom, src.cols + left + right, src.type());\r
+\r
+    cudaStream_t stream = StreamAccessor::getStream(s);\r
  \r
-    switch (src.type())\r
+    if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))\r
      {\r
-    case CV_8UC1:\r
-        {\r
-            Npp8u nVal = static_cast<Npp8u>(value[0]);\r
-            nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
-                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
-            break;\r
-        }\r
-    case CV_8UC4:\r
-        {\r
-            Npp8u nVal[] = {static_cast<Npp8u>(value[0]), static_cast<Npp8u>(value[1]), static_cast<Npp8u>(value[2]), static_cast<Npp8u>(value[3])};\r
-            nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
-                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
-            break;\r
-        }\r
-    case CV_32SC1:\r
-        {\r
-            Npp32s nVal = static_cast<Npp32s>(value[0]);\r
-            nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
-                dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
-            break;\r
-        }\r
-    case CV_32FC1:\r
+        NppiSize srcsz;\r
+        srcsz.width  = src.cols;\r
+        srcsz.height = src.rows;\r
+\r
+        NppiSize dstsz;\r
+        dstsz.width  = dst.cols;\r
+        dstsz.height = dst.rows;\r
+\r
+        NppStreamHandler h(stream);\r
+\r
+        switch (src.type())\r
          {\r
-            Npp32f val = static_cast<Npp32f>(value[0]);\r
-            Npp32s nVal = *(reinterpret_cast<Npp32s*>(&val));\r
-            nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
-                dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
-            break;\r
+        case CV_8UC1:\r
+            {\r
+                Npp8u nVal = saturate_cast<Npp8u>(value[0]);\r
+                nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
+                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
+                break;\r
+            }\r
+        case CV_8UC4:\r
+            {\r
+                Npp8u nVal[] = {saturate_cast<Npp8u>(value[0]), saturate_cast<Npp8u>(value[1]), saturate_cast<Npp8u>(value[2]), saturate_cast<Npp8u>(value[3])};\r
+                nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
+                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
+                break;\r
+            }\r
+        case CV_32SC1:\r
+            {\r
+                Npp32s nVal = saturate_cast<Npp32s>(value[0]);\r
+                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
+                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
+                break;\r
+            }\r
+        case CV_32FC1:\r
+            {\r
+                Npp32f val = saturate_cast<Npp32f>(value[0]);\r
+                Npp32s nVal = *(reinterpret_cast<Npp32s*>(&val));\r
+                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
+                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
+                break;\r
+            }\r
          }\r
-    default:\r
-        CV_Assert(!"Unsupported source type");\r
+\r
+        if (stream == 0)\r
+            cudaSafeCall( cudaDeviceSynchronize() );\r
      }\r
+    else\r
+    {\r
+        typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);\r
+        static const caller_t callers[6][4] = \r
+        {\r
+            {   copyMakeBorder_caller<uchar, 1>  , 0/*copyMakeBorder_caller<uchar, 2>*/ ,    copyMakeBorder_caller<uchar, 3>  ,    copyMakeBorder_caller<uchar, 4>},\r
+            {0/*copyMakeBorder_caller<schar, 1>*/, 0/*copyMakeBorder_caller<schar, 2>*/ , 0/*copyMakeBorder_caller<schar, 3>*/, 0/*copyMakeBorder_caller<schar, 4>*/},\r
+            {   copyMakeBorder_caller<ushort, 1> , 0/*copyMakeBorder_caller<ushort, 2>*/,    copyMakeBorder_caller<ushort, 3> ,    copyMakeBorder_caller<ushort, 4>},\r
+            {   copyMakeBorder_caller<short, 1>  , 0/*copyMakeBorder_caller<short, 2>*/ ,    copyMakeBorder_caller<short, 3>  ,    copyMakeBorder_caller<short, 4>},\r
+            {0/*copyMakeBorder_caller<int, 1>*/  , 0/*copyMakeBorder_caller<int, 2>*/   , 0/*copyMakeBorder_caller<int, 3>*/  , 0/*copyMakeBorder_caller<int, 4>*/},\r
+            {   copyMakeBorder_caller<float, 1>  , 0/*copyMakeBorder_caller<float, 2>*/ ,    copyMakeBorder_caller<float, 3>  ,    copyMakeBorder_caller<float ,4>}\r
+        };\r
  \r
-    if (stream == 0)\r
-        cudaSafeCall( cudaDeviceSynchronize() );\r
+        caller_t func = callers[src.depth()][src.channels() - 1];\r
+        CV_Assert(func != 0);\r
+\r
+        int gpuBorderType;\r
+        CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
+\r
+        func(src, dst, top, left, gpuBorderType, value, stream);\r
+    }\r
  }\r
  \r
  ////////////////////////////////////////////////////////////////////////\r
diff --git a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp

index 1f226b9..2888a52 100644 (file)
--- a/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
+++ b/modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
@@ -73,11 +73,6 @@ namespace cv { namespace gpu { namespace device
              return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;\r
          }\r
  \r
-        __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
-        {\r
-            return true;\r
-        }\r
-\r
          const int width;\r
          const D val;\r
      };\r
@@ -103,11 +98,6 @@ namespace cv { namespace gpu { namespace device
              return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;\r
          }\r
  \r
-        __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
-        {\r
-            return true;\r
-        }\r
-\r
          const int height;\r
          const D val;\r
      };\r
@@ -116,8 +106,7 @@ namespace cv { namespace gpu { namespace device
      {\r
          typedef D result_type;\r
  \r
-        __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : \r
-            height(height_), width(width_), val(val_) \r
+        __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_) \r
          {\r
          }\r
  \r
@@ -176,11 +165,6 @@ namespace cv { namespace gpu { namespace device
              return saturate_cast<D>(data[idx_col(x)]);\r
          }\r
  \r
-        bool is_range_safe(int mini, int maxi) const \r
-        {\r
-            return true;\r
-        }\r
-\r
          const int last_col;\r
      };\r
  \r
@@ -221,11 +205,6 @@ namespace cv { namespace gpu { namespace device
              return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));\r
          }\r
  \r
-        bool is_range_safe(int mini, int maxi) const \r
-        {\r
-            return true;\r
-        }\r
-\r
          const int last_row;\r
      };\r
  \r
@@ -233,15 +212,8 @@ namespace cv { namespace gpu { namespace device
      {\r
          typedef D result_type;\r
  \r
-        __host__ __device__ __forceinline__ BrdReplicate(int height, int width) : \r
-            last_row(height - 1), last_col(width - 1) \r
-        {\r
-        }\r
-        template <typename U> \r
-        __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : \r
-            last_row(height - 1), last_col(width - 1) \r
-        {\r
-        }\r
+        __host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}\r
+        template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}\r
  \r
          __device__ __forceinline__ int idx_row_low(int y) const\r
          {\r
@@ -299,12 +271,12 @@ namespace cv { namespace gpu { namespace device
  \r
          __device__ __forceinline__ int idx_col_low(int x) const\r
          {\r
-            return ::abs(x);\r
+            return ::abs(x) % (last_col + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_col_high(int x) const \r
          {\r
-            return last_col - ::abs(last_col - x);\r
+            return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_col(int x) const\r
@@ -327,11 +299,6 @@ namespace cv { namespace gpu { namespace device
              return saturate_cast<D>(data[idx_col(x)]);\r
          }\r
  \r
-        __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
-        {\r
-            return -last_col <= mini && maxi <= 2 * last_col;\r
-        }\r
-\r
          const int last_col;\r
      };\r
  \r
@@ -344,12 +311,12 @@ namespace cv { namespace gpu { namespace device
  \r
          __device__ __forceinline__ int idx_row_low(int y) const\r
          {\r
-            return ::abs(y);\r
+            return ::abs(y) % (last_row + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_row_high(int y) const \r
          {\r
-            return last_row - ::abs(last_row - y);\r
+            return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_row(int y) const\r
@@ -372,11 +339,6 @@ namespace cv { namespace gpu { namespace device
              return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));\r
          }\r
  \r
-        __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
-        {\r
-            return -last_row <= mini && maxi <= 2 * last_row;\r
-        }\r
-\r
          const int last_row;\r
      };\r
  \r
@@ -384,24 +346,17 @@ namespace cv { namespace gpu { namespace device
      {\r
          typedef D result_type;\r
  \r
-        __host__ __device__ __forceinline__ BrdReflect101(int height, int width) : \r
-            last_row(height - 1), last_col(width - 1) \r
-        {\r
-        }\r
-        template <typename U> \r
-        __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : \r
-            last_row(height - 1), last_col(width - 1) \r
-        {\r
-        }\r
+        __host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}\r
+        template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}\r
  \r
          __device__ __forceinline__ int idx_row_low(int y) const\r
          {\r
-            return ::abs(y);\r
+            return ::abs(y) % (last_row + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_row_high(int y) const \r
          {\r
-            return last_row - ::abs(last_row - y);\r
+            return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_row(int y) const\r
@@ -411,12 +366,12 @@ namespace cv { namespace gpu { namespace device
  \r
          __device__ __forceinline__ int idx_col_low(int x) const\r
          {\r
-            return ::abs(x);\r
+            return ::abs(x) % (last_col + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_col_high(int x) const \r
          {\r
-            return last_col - ::abs(last_col - x);\r
+            return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_col(int x) const\r
@@ -450,12 +405,12 @@ namespace cv { namespace gpu { namespace device
  \r
          __device__ __forceinline__ int idx_col_low(int x) const\r
          {\r
-            return ::abs(x) - (x < 0);\r
+            return (::abs(x) - (x < 0)) % (last_col + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_col_high(int x) const \r
          {\r
-            return last_col - ::abs(last_col - x) + (x > last_col);\r
+            return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_col(int x) const\r
@@ -478,11 +433,6 @@ namespace cv { namespace gpu { namespace device
              return saturate_cast<D>(data[idx_col(x)]);\r
          }\r
  \r
-        __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
-        {\r
-            return -last_col <= mini && maxi <= 2 * last_col;\r
-        }\r
-\r
          const int last_col;\r
      };\r
  \r
@@ -495,12 +445,12 @@ namespace cv { namespace gpu { namespace device
  \r
          __device__ __forceinline__ int idx_row_low(int y) const\r
          {\r
-            return ::abs(y) - (y < 0);\r
+            return (::abs(y) - (y < 0)) % (last_row + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_row_high(int y) const \r
          {\r
-            return last_row - ::abs(last_row - y) + (y > last_row);\r
+            return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_row(int y) const\r
@@ -523,11 +473,6 @@ namespace cv { namespace gpu { namespace device
              return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));\r
          }\r
  \r
-        __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
-        {\r
-            return -last_row <= mini && maxi <= 2 * last_row;\r
-        }\r
-\r
          const int last_row;\r
      };\r
  \r
@@ -535,24 +480,17 @@ namespace cv { namespace gpu { namespace device
      {\r
          typedef D result_type;\r
  \r
-        __host__ __device__ __forceinline__ BrdReflect(int height, int width) : \r
-            last_row(height - 1), last_col(width - 1) \r
-        {\r
-        }\r
-        template <typename U> \r
-        __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : \r
-            last_row(height - 1), last_col(width - 1) \r
-        {\r
-        }\r
+        __host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}\r
+        template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}\r
  \r
          __device__ __forceinline__ int idx_row_low(int y) const\r
          {\r
-            return ::abs(y) - (y < 0);\r
+            return (::abs(y) - (y < 0)) % (last_row + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_row_high(int y) const \r
          {\r
-            return last_row - ::abs(last_row - y) + (y > last_row);\r
+            return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;\r
          }\r
  \r
          __device__ __forceinline__ int idx_row(int y) const\r
@@ -562,12 +500,12 @@ namespace cv { namespace gpu { namespace device
  \r
          __device__ __forceinline__ int idx_col_low(int x) const\r
          {\r
-            return ::abs(x) - (x < 0);\r
+            return (::abs(x) - (x < 0)) % (last_col + 1);\r
          }\r
  \r
          __device__ __forceinline__ int idx_col_high(int x) const \r
          {\r
-            return last_col - ::abs(last_col - x) + (x > last_col);\r
+            return /*::abs*/(last_col - ::abs(last_col - x) + (x > last_col)) /*% (last_col + 1)*/;\r
          }\r
  \r
          __device__ __forceinline__ int idx_col(int x) const\r
@@ -629,11 +567,6 @@ namespace cv { namespace gpu { namespace device
              return saturate_cast<D>(data[idx_col(x)]);\r
          }\r
  \r
-        __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
-        {\r
-            return true;\r
-        }\r
-\r
          const int width;\r
      };\r
  \r
@@ -674,11 +607,6 @@ namespace cv { namespace gpu { namespace device
              return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));\r
          }\r
  \r
-        __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const \r
-        {\r
-            return true;\r
-        }\r
-\r
          const int height;\r
      };\r
  \r
diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp

index f476fbb..493f4a4 100644 (file)
--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -262,10 +262,11 @@ INSTANTIATE_TEST_CASE_P
  ///////////////////////////////////////////////////////////////////////////////////////////////////////\r
  // copyMakeBorder\r
  \r
-struct CopyMakeBorder : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >\r
+struct CopyMakeBorder : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int> >\r
  {\r
      cv::gpu::DeviceInfo devInfo;\r
      int type;\r
+    int borderType;\r
  \r
      cv::Size size;\r
      cv::Mat src;\r
@@ -281,6 +282,7 @@ struct CopyMakeBorder : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceI
      {\r
          devInfo = std::tr1::get<0>(GetParam());\r
          type = std::tr1::get<1>(GetParam());\r
+        borderType = std::tr1::get<2>(GetParam());\r
  \r
          cv::gpu::setDevice(devInfo.deviceID());\r
  \r
@@ -296,12 +298,16 @@ struct CopyMakeBorder : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceI
          right = rng.uniform(1, 10);\r
          val = cv::Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));\r
  \r
-        cv::copyMakeBorder(src, dst_gold, top, botton, left, right, cv::BORDER_CONSTANT, val);\r
+        cv::copyMakeBorder(src, dst_gold, top, botton, left, right, borderType, val);\r
      }\r
  };\r
  \r
  TEST_P(CopyMakeBorder, Accuracy)\r
  {\r
+    static const char* borderTypes_str[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};\r
+\r
+    const char* borderTypeStr = borderTypes_str[borderType];\r
+\r
      PRINT_PARAM(devInfo);\r
      PRINT_TYPE(type);\r
      PRINT_PARAM(size);\r
@@ -309,6 +315,7 @@ TEST_P(CopyMakeBorder, Accuracy)
      PRINT_PARAM(botton);\r
      PRINT_PARAM(left);\r
      PRINT_PARAM(right);\r
+    PRINT_PARAM(borderTypeStr);\r
      PRINT_PARAM(val);\r
  \r
      cv::Mat dst;\r
@@ -316,7 +323,7 @@ TEST_P(CopyMakeBorder, Accuracy)
      ASSERT_NO_THROW(\r
          cv::gpu::GpuMat gpuRes;\r
  \r
-        cv::gpu::copyMakeBorder(cv::gpu::GpuMat(src), gpuRes, top, botton, left, right, val);\r
+        cv::gpu::copyMakeBorder(cv::gpu::GpuMat(src), gpuRes, top, botton, left, right, borderType, val);\r
  \r
          gpuRes.download(dst);\r
      );\r
@@ -326,7 +333,8 @@ TEST_P(CopyMakeBorder, Accuracy)
  \r
  INSTANTIATE_TEST_CASE_P(ImgProc, CopyMakeBorder, testing::Combine(\r
                          testing::ValuesIn(devices()), \r
-                        testing::Values(CV_8UC1, CV_8UC4, CV_32SC1)));\r
+                        testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_16SC1, CV_16SC3, CV_16SC4, CV_32FC1, CV_32FC3, CV_32FC4),\r
+                        testing::Values((int)cv::BORDER_REFLECT101, (int)cv::BORDER_REPLICATE, (int)cv::BORDER_CONSTANT, (int)cv::BORDER_REFLECT, (int)cv::BORDER_WRAP)));\r
  \r
  ///////////////////////////////////////////////////////////////////////////////////////////////////////\r
  // warpAffine & warpPerspective\r
author	Vladislav Vinogradov <no@email>
	Wed, 21 Sep 2011 07:02:45 +0000 (07:02 +0000)
committer	Vladislav Vinogradov <no@email>
	Wed, 21 Sep 2011 07:02:45 +0000 (07:02 +0000)
modules/gpu/include/opencv2/gpu/gpu.hpp		patch \| blob \| history
modules/gpu/perf/perf_imgproc.cpp		patch \| blob \| history
modules/gpu/perf/perf_utility.hpp		patch \| blob \| history
modules/gpu/src/cuda/column_filter.cu		patch \| blob \| history
modules/gpu/src/cuda/copy_make_border.cu	[new file with mode: 0644]	patch \| blob
modules/gpu/src/cuda/row_filter.cu		patch \| blob \| history
modules/gpu/src/imgproc.cpp		patch \| blob \| history
modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp		patch \| blob \| history
modules/gpu/test/test_imgproc.cpp		patch \| blob \| history