implemented gpu::resize for all types

author Vladislav Vinogradov <no@email>

Mon, 12 Sep 2011 10:13:14 +0000 (10:13 +0000)

committer Vladislav Vinogradov <no@email>

Mon, 12 Sep 2011 10:13:14 +0000 (10:13 +0000)
author Vladislav Vinogradov <no@email>
Mon, 12 Sep 2011 10:13:14 +0000 (10:13 +0000)
committer Vladislav Vinogradov <no@email>
Mon, 12 Sep 2011 10:13:14 +0000 (10:13 +0000)
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp

index 250c630..3c7dcc7 100644 (file)
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -633,8 +633,7 @@ namespace cv
          CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null());\r
  \r
          //! resizes the image\r
-        //! Supports INTER_NEAREST, INTER_LINEAR\r
-        //! supports CV_8UC1, CV_8UC4 types\r
+        //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC\r
          CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null());\r
  \r
          //! warps the image using affine transformation\r
diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp

index 67e7466..f4d63d6 100644 (file)
--- a/modules/gpu/perf/perf_imgproc.cpp
+++ b/modules/gpu/perf/perf_imgproc.cpp
@@ -245,8 +245,8 @@ PERF_TEST_P(DevInfo_Size_MatType, threshold, testing::Combine(testing::ValuesIn(
  \r
  PERF_TEST_P(DevInfo_Size_MatType_Interpolation_SizeCoeff, resize, testing::Combine(testing::ValuesIn(devices()),\r
                                                                                     testing::Values(GPU_TYPICAL_MAT_SIZES), \r
-                                                                                   testing::Values(CV_8UC1, CV_8UC4),\r
-                                                                                   testing::Values((int)INTER_NEAREST, (int)INTER_LINEAR),\r
+                                                                                   testing::Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_16UC1, CV_16UC3, CV_16UC4, CV_32FC1, CV_32FC3, CV_32FC4),\r
+                                                                                   testing::Values((int)INTER_NEAREST, (int)INTER_LINEAR, (int)INTER_CUBIC),\r
                                                                                     testing::Values(0.5, 2.0)))\r
  {\r
      DeviceInfo devInfo = std::tr1::get<0>(GetParam());\r
@@ -264,7 +264,7 @@ PERF_TEST_P(DevInfo_Size_MatType_Interpolation_SizeCoeff, resize, testing::Combi
      GpuMat src(src_host);\r
      GpuMat dst;\r
  \r
-    declare.time(0.5).iterations(100);\r
+    declare.time(1.0).iterations(100);\r
  \r
      SIMPLE_TEST_CYCLE()\r
      {\r
diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu

index 44a54ca..d76f93b 100644 (file)
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -252,6 +252,222 @@ namespace cv { namespace gpu { namespace imgproc
      template void remap_gpu<float3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
      template void remap_gpu<float4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
  \r
+/////////////////////////////////// Resize ///////////////////////////////////////////////\r
+\r
+    template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)\r
+    {\r
+        const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+        const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+        if (x < dst.cols && y < dst.rows)\r
+        {\r
+            const float xcoo = x / fx;\r
+            const float ycoo = y / fy;\r
+\r
+            dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));\r
+        }\r
+    }\r
+    template <typename Ptr2D, typename T> __global__ void resizeNN(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)\r
+    {\r
+        const int x = blockDim.x * blockIdx.x + threadIdx.x;\r
+        const int y = blockDim.y * blockIdx.y + threadIdx.y;\r
+\r
+        if (x < dst.cols && y < dst.rows)\r
+        {\r
+            const float xcoo = x / fx;\r
+            const float ycoo = y / fy;\r
+\r
+            dst.ptr(y)[x] = src(__float2int_rd(ycoo), __float2int_rd(xcoo));\r
+        }\r
+    }\r
+\r
+    template <template <typename> class Filter, typename T> struct ResizeDispatcherStream\r
+    {\r
+        static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)\r
+        {            \r
+            dim3 block(32, 8);\r
+            dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
+\r
+            BrdReplicate<T> brd(src.rows, src.cols);\r
+            BorderReader< PtrStep_<T>, BrdReplicate<T> > brdSrc(src, brd);\r
+            Filter< BorderReader< PtrStep_<T>, BrdReplicate<T> > > filter_src(brdSrc);\r
+\r
+            resize<<<grid, block, 0, stream>>>(filter_src, fx, fy, dst);\r
+            cudaSafeCall( cudaGetLastError() );\r
+        }\r
+    };\r
+    template <typename T> struct ResizeDispatcherStream<PointFilter, T>\r
+    {\r
+        static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)\r
+        {            \r
+            dim3 block(32, 8);\r
+            dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
+\r
+            BrdReplicate<T> brd(src.rows, src.cols);\r
+            BorderReader< PtrStep_<T>, BrdReplicate<T> > brdSrc(src, brd);\r
+\r
+            resizeNN<<<grid, block, 0, stream>>>(brdSrc, fx, fy, dst);\r
+            cudaSafeCall( cudaGetLastError() );\r
+        }\r
+    };\r
+    \r
+    template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream\r
+    {\r
+        static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst)\r
+        {            \r
+            dim3 block(32, 8);\r
+            dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
+\r
+            BrdReplicate<T> brd(src.rows, src.cols);\r
+            BorderReader< PtrStep_<T>, BrdReplicate<T> > brdSrc(src, brd);\r
+            Filter< BorderReader< PtrStep_<T>, BrdReplicate<T> > > filter_src(brdSrc);\r
+\r
+            resize<<<grid, block>>>(filter_src, fx, fy, dst);\r
+            cudaSafeCall( cudaGetLastError() );\r
+\r
+            cudaSafeCall( cudaDeviceSynchronize() );\r
+        }\r
+    };\r
+    template <typename T> struct ResizeDispatcherNonStream<PointFilter, T>\r
+    {\r
+        static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst)\r
+        {            \r
+            dim3 block(32, 8);\r
+            dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));\r
+\r
+            BrdReplicate<T> brd(src.rows, src.cols);\r
+            BorderReader< PtrStep_<T>, BrdReplicate<T> > brdSrc(src, brd);\r
+\r
+            resizeNN<<<grid, block>>>(brdSrc, fx, fy, dst);\r
+            cudaSafeCall( cudaGetLastError() );\r
+\r
+            cudaSafeCall( cudaDeviceSynchronize() );\r
+        }\r
+    };\r
+\r
+#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \\r
+    texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \\r
+    struct tex_resize_ ## type ## _reader \\r
+    { \\r
+        typedef type elem_type; \\r
+        typedef int index_type; \\r
+        __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \\r
+        { \\r
+            return tex2D(tex_resize_ ## type , x, y); \\r
+        } \\r
+    }; \\r
+    template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type> \\r
+    { \\r
+        static void call(const DevMem2D_< type >& src, float fx, float fy, const DevMem2D_< type >& dst) \\r
+        { \\r
+            dim3 block(32, 8); \\r
+            dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \\r
+            TextureBinder texHandler(&tex_resize_ ## type , src); \\r
+            tex_resize_ ## type ##_reader texSrc; \\r
+            Filter< tex_resize_ ## type ##_reader > filter_src(texSrc); \\r
+            resize<<<grid, block>>>(filter_src, fx, fy, dst); \\r
+            cudaSafeCall( cudaGetLastError() ); \\r
+            cudaSafeCall( cudaDeviceSynchronize() ); \\r
+        } \\r
+    }; \\r
+    template <> struct ResizeDispatcherNonStream<PointFilter, type> \\r
+    { \\r
+        static void call(const DevMem2D_< type >& src, float fx, float fy, const DevMem2D_< type >& dst) \\r
+        { \\r
+            dim3 block(32, 8); \\r
+            dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \\r
+            TextureBinder texHandler(&tex_resize_ ## type , src); \\r
+            tex_resize_ ## type ##_reader texSrc; \\r
+            resizeNN<<<grid, block>>>(texSrc, fx, fy, dst); \\r
+            cudaSafeCall( cudaGetLastError() ); \\r
+            cudaSafeCall( cudaDeviceSynchronize() ); \\r
+        } \\r
+    };\r
+    \r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar2)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)\r
+    \r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char2)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)\r
+    \r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort2)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)\r
+    \r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short2)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)\r
+    \r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int2)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)\r
+    \r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float2)\r
+    OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)\r
+    \r
+#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX\r
+\r
+    template <template <typename> class Filter, typename T> struct ResizeDispatcher\r
+    { \r
+        static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)\r
+        {\r
+            if (stream == 0)\r
+                ResizeDispatcherNonStream<Filter, T>::call(src, fx, fy, dst);\r
+            else\r
+                ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);\r
+        }\r
+    };\r
+\r
+    template <typename T> void resize_gpu(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream)\r
+    {\r
+        typedef void (*caller_t)(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream);\r
+\r
+        static const caller_t callers[3] = \r
+        {\r
+            ResizeDispatcher<PointFilter, T>::call, ResizeDispatcher<LinearFilter, T>::call, ResizeDispatcher<CubicFilter, T>::call\r
+        };\r
+\r
+        callers[interpolation](static_cast< DevMem2D_<T> >(src), fx, fy, static_cast< DevMem2D_<T> >(dst), stream);\r
+    }\r
+\r
+    template void resize_gpu<uchar >(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<uchar2>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<uchar3>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<uchar4>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    \r
+    template void resize_gpu<schar>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<char2>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<char3>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<char4>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    \r
+    template void resize_gpu<ushort >(const DevMem2D& src,float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<ushort2>(const DevMem2D& src,float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<ushort3>(const DevMem2D& src,float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<ushort4>(const DevMem2D& src,float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    \r
+    template void resize_gpu<short >(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<short2>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<short3>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<short4>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    \r
+    template void resize_gpu<uint >(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<uint2>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<uint3>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<uint4>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    \r
+    template void resize_gpu<int >(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<int2>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<int3>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<int4>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    \r
+    template void resize_gpu<float >(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<float2>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<float3>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    template void resize_gpu<float4>(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+\r
  /////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////\r
  \r
      texture<uchar4, 2> tex_meanshift;\r
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp

index 19ac1a1..b9d3607 100644 (file)
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -272,14 +272,28 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
  ////////////////////////////////////////////////////////////////////////\r
  // resize\r
  \r
+namespace cv { namespace gpu {  namespace imgproc\r
+{\r
+    template <typename T> void resize_gpu(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+}}}\r
+\r
  void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)\r
  {\r
-    static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR/*, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS*/};\r
+    using namespace cv::gpu::imgproc;\r
  \r
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR/* || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);\r
+    typedef void (*caller_t)(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+    static const caller_t callers[6][4] = \r
+    {\r
+        {resize_gpu<uchar>, resize_gpu<uchar2>, resize_gpu<uchar3>, resize_gpu<uchar4>},\r
+        {resize_gpu<schar>, resize_gpu<char2>, resize_gpu<char3>, resize_gpu<char4>},\r
+        {resize_gpu<ushort>, resize_gpu<ushort2>, resize_gpu<ushort3>, resize_gpu<ushort4>},\r
+        {resize_gpu<short>, resize_gpu<short2>, resize_gpu<short3>, resize_gpu<short4>},\r
+        {resize_gpu<int>, resize_gpu<int2>, resize_gpu<int3>, resize_gpu<int4>},\r
+        {resize_gpu<float>, resize_gpu<float2>, resize_gpu<float3>, resize_gpu<float4>}\r
+    };\r
  \r
-    CV_Assert( src.size().area() > 0 );\r
+    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );\r
+    CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );\r
      CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );\r
  \r
      if( dsize == Size() )\r
@@ -294,34 +308,43 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
  \r
      dst.create(dsize, src.type());\r
  \r
-    NppiSize srcsz;\r
-    srcsz.width  = src.cols;\r
-    srcsz.height = src.rows;\r
-    NppiRect srcrect;\r
-    srcrect.x = srcrect.y = 0;\r
-    srcrect.width  = src.cols;\r
-    srcrect.height = src.rows;\r
-    NppiSize dstsz;\r
-    dstsz.width  = dst.cols;\r
-    dstsz.height = dst.rows;\r
-\r
      cudaStream_t stream = StreamAccessor::getStream(s);\r
  \r
-    NppStreamHandler h(stream);\r
-\r
-    if (src.type() == CV_8UC1)\r
+    if ((src.type() == CV_8UC1 || src.type() == CV_8UC4) && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR))\r
      {\r
-        nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
+        static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};\r
+\r
+        NppiSize srcsz;\r
+        srcsz.width  = src.cols;\r
+        srcsz.height = src.rows;\r
+        NppiRect srcrect;\r
+        srcrect.x = srcrect.y = 0;\r
+        srcrect.width  = src.cols;\r
+        srcrect.height = src.rows;\r
+        NppiSize dstsz;\r
+        dstsz.width  = dst.cols;\r
+        dstsz.height = dst.rows;\r
+\r
+        NppStreamHandler h(stream);\r
+\r
+        if (src.type() == CV_8UC1)\r
+        {\r
+            nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
+                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
+        }\r
+        else\r
+        {\r
+            nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
+                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
+        }\r
+\r
+        if (stream == 0)\r
+            cudaSafeCall( cudaDeviceSynchronize() );\r
      }\r
      else\r
      {\r
-        nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
+        callers[src.depth()][src.channels() - 1](src, static_cast<float>(fx), static_cast<float>(fy), dst, interpolation, stream);\r
      }\r
-\r
-    if (stream == 0)\r
-        cudaSafeCall( cudaDeviceSynchronize() );\r
  }\r
  \r
  ////////////////////////////////////////////////////////////////////////\r
diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp

index 094ed9a..f476fbb 100644 (file)
--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -137,7 +137,7 @@ struct Resize : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int
  \r
          size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));\r
  \r
-        src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);\r
+        src = cvtest::randomMat(rng, size, type, 0.0, CV_MAT_DEPTH(type) == CV_32F ? 1.0 : 255.0, false);\r
  \r
          cv::resize(src, dst_gold1, cv::Size(), 2.0, 2.0, interpolation);\r
          cv::resize(src, dst_gold2, cv::Size(), 0.5, 0.5, interpolation);\r
@@ -146,7 +146,7 @@ struct Resize : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int
  \r
  TEST_P(Resize, Accuracy)\r
  {\r
-    static const char* interpolations[] = {"INTER_NEAREST", "INTER_LINEAR"};\r
+    static const char* interpolations[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC"};\r
      const char* interpolationStr = interpolations[interpolation];\r
  \r
      PRINT_PARAM(devInfo);\r
@@ -169,14 +169,14 @@ TEST_P(Resize, Accuracy)
          gpuRes2.download(dst2);\r
      );\r
  \r
-    EXPECT_MAT_SIMILAR(dst_gold1, dst1, 0.5);\r
-    EXPECT_MAT_SIMILAR(dst_gold2, dst2, 0.5);\r
+    EXPECT_MAT_SIMILAR(dst_gold1, dst1, 0.2);\r
+    EXPECT_MAT_SIMILAR(dst_gold2, dst2, 0.2);\r
  }\r
  \r
  INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(\r
                          testing::ValuesIn(devices()), \r
-                        testing::Values(CV_8UC1, CV_8UC4), \r
-                        testing::Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR)));\r
+                        testing::Values(CV_8UC1, CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), \r
+                        testing::Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR, (int)cv::INTER_CUBIC)));\r
  \r
  ///////////////////////////////////////////////////////////////////////////////////////////////////////\r
  // remap\r
diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp

index 5f70236..813f527 100644 (file)
--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -592,9 +592,81 @@ TEST(resize)
  \r
      for (int size = 1000; size <= 3000; size += 1000)\r
      {\r
-        SUBTEST << "size " << size;\r
+        SUBTEST << "size " << size << ", 8UC1, up";\r
+\r
+        gen(src, size, size, CV_8U, 0, 256);\r
+        dst.create(size * 2, size * 2, CV_8U);\r
+\r
+        CPU_ON;\r
+        resize(src, dst, dst.size());\r
+        CPU_OFF;\r
+\r
+        d_src = src;\r
+        d_dst.create(size * 2, size * 2, CV_8U);\r
+\r
+        GPU_ON;\r
+        gpu::resize(d_src, d_dst, d_dst.size());\r
+        GPU_OFF;\r
+    }\r
+    for (int size = 1000; size <= 3000; size += 1000)\r
+    {\r
+        SUBTEST << "size " << size << ", 8UC1, down";\r
  \r
          gen(src, size, size, CV_8U, 0, 256);\r
+        dst.create(size / 2, size / 2, CV_8U);\r
+\r
+        CPU_ON;\r
+        resize(src, dst, dst.size());\r
+        CPU_OFF;\r
+\r
+        d_src = src;\r
+        d_dst.create(size / 2, size / 2, CV_8U);\r
+\r
+        GPU_ON;\r
+        gpu::resize(d_src, d_dst, d_dst.size());\r
+        GPU_OFF;\r
+    }\r
+    for (int size = 1000; size <= 3000; size += 1000)\r
+    {\r
+        SUBTEST << "size " << size << ", 8UC3, up";\r
+\r
+        gen(src, size, size, CV_8UC3, 0, 256);\r
+        dst.create(size * 2, size * 2, CV_8U);\r
+\r
+        CPU_ON;\r
+        resize(src, dst, dst.size());\r
+        CPU_OFF;\r
+\r
+        d_src = src;\r
+        d_dst.create(size * 2, size * 2, CV_8U);\r
+\r
+        GPU_ON;\r
+        gpu::resize(d_src, d_dst, d_dst.size());\r
+        GPU_OFF;\r
+    }\r
+    for (int size = 1000; size <= 3000; size += 1000)\r
+    {\r
+        SUBTEST << "size " << size << ", 8UC3, down";\r
+\r
+        gen(src, size, size, CV_8UC3, 0, 256);\r
+        dst.create(size / 2, size / 2, CV_8U);\r
+\r
+        CPU_ON;\r
+        resize(src, dst, dst.size());\r
+        CPU_OFF;\r
+\r
+        d_src = src;\r
+        d_dst.create(size / 2, size / 2, CV_8U);\r
+\r
+        GPU_ON;\r
+        gpu::resize(d_src, d_dst, d_dst.size());\r
+        GPU_OFF;\r
+    }\r
+    for (int size = 1000; size <= 3000; size += 1000)\r
+    {\r
+        SUBTEST << "size " << size << ", 8UC4, up";\r
+\r
+        gen(src, size, size, CV_8UC4, 0, 256);\r
          dst.create(size * 2, size * 2, CV_8U);\r
  \r
          CPU_ON;\r
@@ -608,6 +680,60 @@ TEST(resize)
          gpu::resize(d_src, d_dst, d_dst.size());\r
          GPU_OFF;\r
      }\r
+    for (int size = 1000; size <= 3000; size += 1000)\r
+    {\r
+        SUBTEST << "size " << size << ", 8UC4, down";\r
+\r
+        gen(src, size, size, CV_8UC4, 0, 256);\r
+        dst.create(size / 2, size / 2, CV_8U);\r
+\r
+        CPU_ON;\r
+        resize(src, dst, dst.size());\r
+        CPU_OFF;\r
+\r
+        d_src = src;\r
+        d_dst.create(size / 2, size / 2, CV_8U);\r
+\r
+        GPU_ON;\r
+        gpu::resize(d_src, d_dst, d_dst.size());\r
+        GPU_OFF;\r
+    }\r
+    for (int size = 1000; size <= 3000; size += 1000)\r
+    {\r
+        SUBTEST << "size " << size << ", 32FC1, up";\r
+\r
+        gen(src, size, size, CV_32FC1, 0, 256);\r
+        dst.create(size * 2, size * 2, CV_8U);\r
+\r
+        CPU_ON;\r
+        resize(src, dst, dst.size());\r
+        CPU_OFF;\r
+\r
+        d_src = src;\r
+        d_dst.create(size * 2, size * 2, CV_8U);\r
+\r
+        GPU_ON;\r
+        gpu::resize(d_src, d_dst, d_dst.size());\r
+        GPU_OFF;\r
+    }\r
+    for (int size = 1000; size <= 3000; size += 1000)\r
+    {\r
+        SUBTEST << "size " << size << ", 32FC1, down";\r
+\r
+        gen(src, size, size, CV_32FC1, 0, 256);\r
+        dst.create(size / 2, size / 2, CV_8U);\r
+\r
+        CPU_ON;\r
+        resize(src, dst, dst.size());\r
+        CPU_OFF;\r
+\r
+        d_src = src;\r
+        d_dst.create(size / 2, size / 2, CV_8U);\r
+\r
+        GPU_ON;\r
+        gpu::resize(d_src, d_dst, d_dst.size());\r
+        GPU_OFF;\r
+    }\r
  }\r
  \r
  \r
author	Vladislav Vinogradov <no@email>
	Mon, 12 Sep 2011 10:13:14 +0000 (10:13 +0000)
committer	Vladislav Vinogradov <no@email>
	Mon, 12 Sep 2011 10:13:14 +0000 (10:13 +0000)
modules/gpu/include/opencv2/gpu/gpu.hpp		patch \| blob \| history
modules/gpu/perf/perf_imgproc.cpp		patch \| blob \| history
modules/gpu/src/cuda/imgproc.cu		patch \| blob \| history
modules/gpu/src/imgproc.cpp		patch \| blob \| history
modules/gpu/test/test_imgproc.cpp		patch \| blob \| history
samples/gpu/performance/tests.cpp		patch \| blob \| history