From 2582464e51594c06e41287831cc9374f72469abb Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 28 May 2012 12:09:40 +0000 Subject: [PATCH] fixed several problems with CUDA 5.0 * gpu::LUT, uses device memory instead of host memory * gpu::multiply, round mod for CV_8U depth --- modules/gpu/src/arithm.cpp | 39 ++++++++++++++- modules/gpu/src/element_operations.cpp | 4 ++ modules/gpu/test/test_core.cpp | 90 +++++++++++++++++----------------- modules/gpu/test/test_gpumat.cpp | 2 +- 4 files changed, 88 insertions(+), 47 deletions(-) diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index 4eb9587..2a0dfee 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -320,12 +320,23 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) const Npp32s* pLevels3[3]; int nValues3[3]; +#if (CUDA_VERSION > 4020) + GpuMat d_pLevels; +#endif + LevelsInit() { nValues3[0] = nValues3[1] = nValues3[2] = 256; for (int i = 0; i < 256; ++i) pLevels[i] = i; + + +#if (CUDA_VERSION <= 4020) pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; +#else + d_pLevels.upload(Mat(1, 256, CV_32S, pLevels)); + pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr(); +#endif } }; static LevelsInit lvls; @@ -350,22 +361,48 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) if (src.type() == CV_8UC1) { +#if (CUDA_VERSION <= 4020) nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, nppLut.ptr(), lvls.pLevels, 256) ); +#else + GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data)); + nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), + dst.ptr(), static_cast(dst.step), sz, d_nppLut.ptr(), lvls.d_pLevels.ptr(), 256) ); +#endif } else { - Mat nppLut3[3]; const Npp32s* pValues3[3]; + + Mat nppLut3[3]; if (nppLut.channels() == 1) + { +#if (CUDA_VERSION <= 4020) pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr(); +#else + GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data)); + pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr(); +#endif + } else { cv::split(nppLut, nppLut3); + +#if (CUDA_VERSION <= 4020) pValues3[0] = nppLut3[0].ptr(); pValues3[1] = nppLut3[1].ptr(); pValues3[2] = nppLut3[2].ptr(); +#else + GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data)); + GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data)); + GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data)); + + pValues3[0] = d_nppLut0.ptr(); + pValues3[1] = d_nppLut1.ptr(); + pValues3[2] = d_nppLut2.ptr(); +#endif } + nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, pValues3, lvls.pLevels3, lvls.nValues3) ); } diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index c32f94c..25e0dfc 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -658,7 +658,11 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); +#if (CUDA_VERSION <= 4020) if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F) +#else + if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F && src1.depth() > CV_8U) +#endif { npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), stream); return; diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index 04039cd..c85a9bf 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -1189,18 +1189,18 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, AbsDiff, testing::Combine( //////////////////////////////////////////////////////////////////////////////// // Abs -PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) +PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) { cv::gpu::DeviceInfo devInfo; cv::Size size; - int type; + int depth; bool useRoi; virtual void SetUp() { devInfo = GET_PARAM(0); size = GET_PARAM(1); - type = GET_PARAM(2); + depth = GET_PARAM(2); useRoi = GET_PARAM(3); cv::gpu::setDevice(devInfo.deviceID()); @@ -1209,9 +1209,9 @@ PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) TEST_P(Abs, Accuracy) { - cv::Mat src = randomMat(size, type); + cv::Mat src = randomMat(size, depth); - cv::gpu::GpuMat dst = createMat(size, type, useRoi); + cv::gpu::GpuMat dst = createMat(size, depth, useRoi); cv::gpu::abs(loadMat(src, useRoi), dst); cv::Mat dst_gold = cv::abs(src); @@ -1222,24 +1222,24 @@ TEST_P(Abs, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Core, Abs, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, - testing::Values(MatType(CV_16SC1), MatType(CV_32FC1)), + testing::Values(MatDepth(CV_16S), MatDepth(CV_32F)), WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// // Sqr -PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) +PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) { cv::gpu::DeviceInfo devInfo; cv::Size size; - int type; + int depth; bool useRoi; virtual void SetUp() { devInfo = GET_PARAM(0); size = GET_PARAM(1); - type = GET_PARAM(2); + depth = GET_PARAM(2); useRoi = GET_PARAM(3); cv::gpu::setDevice(devInfo.deviceID()); @@ -1248,9 +1248,9 @@ PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) TEST_P(Sqr, Accuracy) { - cv::Mat src = randomMat(size, type); + cv::Mat src = randomMat(size, depth, 0, depth == CV_8U ? 16 : 255); - cv::gpu::GpuMat dst = createMat(size, type, useRoi); + cv::gpu::GpuMat dst = createMat(size, depth, useRoi); cv::gpu::sqr(loadMat(src, useRoi), dst); cv::Mat dst_gold; @@ -1262,10 +1262,10 @@ TEST_P(Sqr, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), - MatType(CV_16UC1), - MatType(CV_16SC1), - MatType(CV_32FC1)), + testing::Values(MatDepth(CV_8U), + MatDepth(CV_16U), + MatDepth(CV_16S), + MatDepth(CV_32F)), WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// @@ -1295,18 +1295,18 @@ void sqrtGold(const cv::Mat& src, cv::Mat& dst) funcs[src.depth()](src, dst); } -PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) +PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) { cv::gpu::DeviceInfo devInfo; cv::Size size; - int type; + int depth; bool useRoi; virtual void SetUp() { devInfo = GET_PARAM(0); size = GET_PARAM(1); - type = GET_PARAM(2); + depth = GET_PARAM(2); useRoi = GET_PARAM(3); cv::gpu::setDevice(devInfo.deviceID()); @@ -1315,24 +1315,24 @@ PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) TEST_P(Sqrt, Accuracy) { - cv::Mat src = randomMat(size, type); + cv::Mat src = randomMat(size, depth); - cv::gpu::GpuMat dst = createMat(size, type, useRoi); + cv::gpu::GpuMat dst = createMat(size, depth, useRoi); cv::gpu::sqrt(loadMat(src, useRoi), dst); cv::Mat dst_gold; sqrtGold(src, dst_gold); - EXPECT_MAT_NEAR(dst_gold, dst, 1e-5); + EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5); } INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), - MatType(CV_16UC1), - MatType(CV_16SC1), - MatType(CV_32FC1)), + testing::Values(MatDepth(CV_8U), + MatDepth(CV_16U), + MatDepth(CV_16S), + MatDepth(CV_32F)), WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// @@ -1362,18 +1362,18 @@ void logGold(const cv::Mat& src, cv::Mat& dst) funcs[src.depth()](src, dst); } -PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) +PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) { cv::gpu::DeviceInfo devInfo; cv::Size size; - int type; + int depth; bool useRoi; virtual void SetUp() { devInfo = GET_PARAM(0); size = GET_PARAM(1); - type = GET_PARAM(2); + depth = GET_PARAM(2); useRoi = GET_PARAM(3); cv::gpu::setDevice(devInfo.deviceID()); @@ -1382,24 +1382,24 @@ PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) TEST_P(Log, Accuracy) { - cv::Mat src = randomMat(size, type, 1.0, 255.0); + cv::Mat src = randomMat(size, depth, 1.0, 255.0); - cv::gpu::GpuMat dst = createMat(size, type, useRoi); + cv::gpu::GpuMat dst = createMat(size, depth, useRoi); cv::gpu::log(loadMat(src, useRoi), dst); cv::Mat dst_gold; logGold(src, dst_gold); - EXPECT_MAT_NEAR(dst_gold, dst, 1e-6); + EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-6); } INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), - MatType(CV_16UC1), - MatType(CV_16SC1), - MatType(CV_32FC1)), + testing::Values(MatDepth(CV_8U), + MatDepth(CV_16U), + MatDepth(CV_16S), + MatDepth(CV_32F)), WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// @@ -1439,18 +1439,18 @@ void expGold(const cv::Mat& src, cv::Mat& dst) funcs[src.depth()](src, dst); } -PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) +PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) { cv::gpu::DeviceInfo devInfo; cv::Size size; - int type; + int depth; bool useRoi; virtual void SetUp() { devInfo = GET_PARAM(0); size = GET_PARAM(1); - type = GET_PARAM(2); + depth = GET_PARAM(2); useRoi = GET_PARAM(3); cv::gpu::setDevice(devInfo.deviceID()); @@ -1459,24 +1459,24 @@ PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) TEST_P(Exp, Accuracy) { - cv::Mat src = randomMat(size, type, 0.0, 10.0); + cv::Mat src = randomMat(size, depth, 0.0, 10.0); - cv::gpu::GpuMat dst = createMat(size, type, useRoi); + cv::gpu::GpuMat dst = createMat(size, depth, useRoi); cv::gpu::exp(loadMat(src, useRoi), dst); cv::Mat dst_gold; expGold(src, dst_gold); - EXPECT_MAT_NEAR(dst_gold, dst, 1e-2); + EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-2); } INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), - MatType(CV_16UC1), - MatType(CV_16SC1), - MatType(CV_32FC1)), + testing::Values(MatDepth(CV_8U), + MatDepth(CV_16U), + MatDepth(CV_16S), + MatDepth(CV_32F)), WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/test/test_gpumat.cpp b/modules/gpu/test/test_gpumat.cpp index 8457b71..4dd419a 100644 --- a/modules/gpu/test/test_gpumat.cpp +++ b/modules/gpu/test/test_gpumat.cpp @@ -311,7 +311,7 @@ TEST_P(ConvertTo, WithScaling) cv::Mat dst_gold; src.convertTo(dst_gold, depth2, a, b); - EXPECT_MAT_NEAR(dst_gold, dst, depth2 < CV_32F ? 0.0 : 1e-4); + EXPECT_MAT_NEAR(dst_gold, dst, depth2 < CV_32F ? 1.0 : 1e-4); } } -- 2.7.4