gpu separable filters for CV_8UC3, CV_32FC3 and CV_32FC4 types

author Vladislav Vinogradov <vlad.vinogradov@itseez.com>

Thu, 18 Oct 2012 06:59:25 +0000 (10:59 +0400)

committer Vladislav Vinogradov <vlad.vinogradov@itseez.com>

Fri, 19 Oct 2012 08:41:14 +0000 (12:41 +0400)
author Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Thu, 18 Oct 2012 06:59:25 +0000 (10:59 +0400)
committer Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Fri, 19 Oct 2012 08:41:14 +0000 (12:41 +0400)
diff --git a/modules/gpu/src/cuda/column_filter.cu b/modules/gpu/src/cuda/column_filter.cu

index d3c3ca6..af7369a 100644 (file)
--- a/modules/gpu/src/cuda/column_filter.cu
+++ b/modules/gpu/src/cuda/column_filter.cu
@@ -377,10 +377,13 @@ namespace cv { namespace gpu { namespace device
          }
  
          template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
+        template void linearColumnFilter_gpu<float3, uchar3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
          template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
          template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
          template void linearColumnFilter_gpu<float , int   >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
          template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
+        template void linearColumnFilter_gpu<float3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
+        template void linearColumnFilter_gpu<float4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
      } // namespace column_filter
  }}} // namespace cv { namespace gpu { namespace device
  
diff --git a/modules/gpu/src/cuda/row_filter.cu b/modules/gpu/src/cuda/row_filter.cu

index 7fdbee6..39fc53f 100644 (file)
--- a/modules/gpu/src/cuda/row_filter.cu
+++ b/modules/gpu/src/cuda/row_filter.cu
@@ -376,10 +376,13 @@ namespace cv { namespace gpu { namespace device
          }
  
          template void linearRowFilter_gpu<uchar , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
+        template void linearRowFilter_gpu<uchar3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
          template void linearRowFilter_gpu<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
          template void linearRowFilter_gpu<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
          template void linearRowFilter_gpu<int   , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
          template void linearRowFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
+        template void linearRowFilter_gpu<float3, float3>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
+        template void linearRowFilter_gpu<float4, float4>(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
      } // namespace row_filter
  }}} // namespace cv { namespace gpu { namespace device
  
diff --git a/modules/gpu/src/filtering.cpp b/modules/gpu/src/filtering.cpp

index c34a97f..77ed46e 100644 (file)
--- a/modules/gpu/src/filtering.cpp
+++ b/modules/gpu/src/filtering.cpp
@@ -922,7 +922,7 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
      int gpuBorderType;
      CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
  
-    CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_16SC3 || srcType == CV_32SC1 || srcType == CV_32FC1);
+    CV_Assert(srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_16SC3 || srcType == CV_32SC1 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4);
  
      CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(srcType) == CV_MAT_CN(bufType));
  
@@ -942,6 +942,9 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
      case CV_8UC1:
          func = linearRowFilter_gpu<uchar, float>;
          break;
+    case CV_8UC3:
+        func = linearRowFilter_gpu<uchar3, float3>;
+        break;
      case CV_8UC4:
          func = linearRowFilter_gpu<uchar4, float4>;
          break;
@@ -954,6 +957,12 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
      case CV_32FC1:
          func = linearRowFilter_gpu<float, float>;
          break;
+    case CV_32FC3:
+        func = linearRowFilter_gpu<float3, float3>;
+        break;
+    case CV_32FC4:
+        func = linearRowFilter_gpu<float4, float4>;
+        break;
      }
  
      return Ptr<BaseRowFilter_GPU>(new GpuLinearRowFilter(ksize, anchor, gpu_row_krnl, func, gpuBorderType));
@@ -1034,7 +1043,7 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
      int gpuBorderType;
      CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
  
-    CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC4 || dstType == CV_16SC3 || dstType == CV_32SC1 || dstType == CV_32FC1);
+    CV_Assert(dstType == CV_8UC1 || dstType == CV_8UC3 || dstType == CV_8UC4 || dstType == CV_16SC3 || dstType == CV_32SC1 || dstType == CV_32FC1 || dstType == CV_32FC3 || dstType == CV_32FC4);
  
      CV_Assert(CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(dstType) == CV_MAT_CN(bufType));
  
@@ -1054,6 +1063,9 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
      case CV_8UC1:
          func = linearColumnFilter_gpu<float, uchar>;
          break;
+    case CV_8UC3:
+        func = linearColumnFilter_gpu<float3, uchar3>;
+        break;
      case CV_8UC4:
          func = linearColumnFilter_gpu<float4, uchar4>;
          break;
@@ -1066,6 +1078,12 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
      case CV_32FC1:
          func = linearColumnFilter_gpu<float, float>;
          break;
+    case CV_32FC3:
+        func = linearColumnFilter_gpu<float3, float3>;
+        break;
+    case CV_32FC4:
+        func = linearColumnFilter_gpu<float4, float4>;
+        break;
      }
  
      return Ptr<BaseColumnFilter_GPU>(new GpuLinearColumnFilter(ksize, anchor, gpu_col_krnl, func, gpuBorderType));
diff --git a/modules/gpu/test/test_filters.cpp b/modules/gpu/test/test_filters.cpp

index dddca15..bb07037 100644 (file)
--- a/modules/gpu/test/test_filters.cpp
+++ b/modules/gpu/test/test_filters.cpp
@@ -152,13 +152,13 @@ TEST_P(Sobel, Accuracy)
      cv::Mat dst_gold;
      cv::Sobel(src, dst_gold, -1, dx, dy, ksize.width, 1.0, 0.0, borderType);
  
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1);
  }
  
  INSTANTIATE_TEST_CASE_P(GPU_Filter, Sobel, testing::Combine(
      ALL_DEVICES,
      DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
      testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7))),
      testing::Values(Deriv_X(0), Deriv_X(1), Deriv_X(2)),
      testing::Values(Deriv_Y(0), Deriv_Y(1), Deriv_Y(2)),
@@ -208,13 +208,13 @@ TEST_P(Scharr, Accuracy)
      cv::Mat dst_gold;
      cv::Scharr(src, dst_gold, -1, dx, dy, 1.0, 0.0, borderType);
  
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+    EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1);
  }
  
  INSTANTIATE_TEST_CASE_P(GPU_Filter, Scharr, testing::Combine(
      ALL_DEVICES,
      DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
      testing::Values(Deriv_X(0), Deriv_X(1)),
      testing::Values(Deriv_Y(0), Deriv_Y(1)),
      testing::Values(BorderType(cv::BORDER_REFLECT101),
@@ -281,7 +281,7 @@ TEST_P(GaussianBlur, Accuracy)
  INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine(
      ALL_DEVICES,
      DIFFERENT_SIZES,
-    testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
+    testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
      testing::Values(KSize(cv::Size(3, 3)),
                      KSize(cv::Size(5, 5)),
                      KSize(cv::Size(7, 7)),
author	Vladislav Vinogradov <vlad.vinogradov@itseez.com>
	Thu, 18 Oct 2012 06:59:25 +0000 (10:59 +0400)
committer	Vladislav Vinogradov <vlad.vinogradov@itseez.com>
	Fri, 19 Oct 2012 08:41:14 +0000 (12:41 +0400)
modules/gpu/src/cuda/column_filter.cu		patch \| blob \| history
modules/gpu/src/cuda/row_filter.cu		patch \| blob \| history
modules/gpu/src/filtering.cpp		patch \| blob \| history
modules/gpu/test/test_filters.cpp		patch \| blob \| history