From: Vladislav Vinogradov Date: Tue, 1 Oct 2013 11:24:17 +0000 (+0400) Subject: used new device layer for cv::cuda::LUT X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~877^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=23cc31e0416119870acc81f21a8ce165be27bd19;p=profile%2Fivi%2Fopencv.git used new device layer for cv::cuda::LUT --- diff --git a/modules/cudaarithm/src/core.cpp b/modules/cudaarithm/src/core.cpp index 535485f..eb71d6a 100644 --- a/modules/cudaarithm/src/core.cpp +++ b/modules/cudaarithm/src/core.cpp @@ -130,217 +130,4 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream)); } -//////////////////////////////////////////////////////////////////////// -// LUT - -#if (CUDA_VERSION >= 5000) - -namespace -{ - class LookUpTableImpl : public LookUpTable - { - public: - LookUpTableImpl(InputArray lut); - - void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); - - private: - int lut_cn; - - int nValues3[3]; - const Npp32s* pValues3[3]; - const Npp32s* pLevels3[3]; - - GpuMat d_pLevels; - GpuMat d_nppLut; - GpuMat d_nppLut3[3]; - }; - - LookUpTableImpl::LookUpTableImpl(InputArray _lut) - { - nValues3[0] = nValues3[1] = nValues3[2] = 256; - - Npp32s pLevels[256]; - for (int i = 0; i < 256; ++i) - pLevels[i] = i; - - d_pLevels.upload(Mat(1, 256, CV_32S, pLevels)); - pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr(); - - GpuMat lut; - if (_lut.kind() == _InputArray::GPU_MAT) - { - lut = _lut.getGpuMat(); - } - else - { - Mat hLut = _lut.getMat(); - CV_Assert( hLut.total() == 256 && hLut.isContinuous() ); - lut.upload(Mat(1, 256, hLut.type(), hLut.data)); - } - - lut_cn = lut.channels(); - - CV_Assert( lut.depth() == CV_8U ); - CV_Assert( lut.rows == 1 && lut.cols == 256 ); - - lut.convertTo(d_nppLut, CV_32S); - - if (lut_cn == 1) - { - pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr(); - } - else - { - cuda::split(d_nppLut, d_nppLut3); - - pValues3[0] = d_nppLut3[0].ptr(); - pValues3[1] = d_nppLut3[1].ptr(); - pValues3[2] = d_nppLut3[2].ptr(); - } - } - - void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& _stream) - { - GpuMat src = _src.getGpuMat(); - - const int cn = src.channels(); - - CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 ); - CV_Assert( lut_cn == 1 || lut_cn == cn ); - - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - - NppStreamHandler h(stream); - - NppiSize sz; - sz.height = src.rows; - sz.width = src.cols; - - if (src.type() == CV_8UC1) - { - nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, d_nppLut.ptr(), d_pLevels.ptr(), 256) ); - } - else - { - nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, pValues3, pLevels3, nValues3) ); - } - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } -} - -#else // (CUDA_VERSION >= 5000) - -namespace -{ - class LookUpTableImpl : public LookUpTable - { - public: - LookUpTableImpl(InputArray lut); - - void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); - - private: - int lut_cn; - - Npp32s pLevels[256]; - int nValues3[3]; - const Npp32s* pValues3[3]; - const Npp32s* pLevels3[3]; - - Mat nppLut; - Mat nppLut3[3]; - }; - - LookUpTableImpl::LookUpTableImpl(InputArray _lut) - { - nValues3[0] = nValues3[1] = nValues3[2] = 256; - - for (int i = 0; i < 256; ++i) - pLevels[i] = i; - pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; - - Mat lut; - if (_lut.kind() == _InputArray::GPU_MAT) - { - lut = Mat(_lut.getGpuMat()); - } - else - { - Mat hLut = _lut.getMat(); - CV_Assert( hLut.total() == 256 && hLut.isContinuous() ); - lut = hLut; - } - - lut_cn = lut.channels(); - - CV_Assert( lut.depth() == CV_8U ); - CV_Assert( lut.rows == 1 && lut.cols == 256 ); - - lut.convertTo(nppLut, CV_32S); - - if (lut_cn == 1) - { - pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr(); - } - else - { - cv::split(nppLut, nppLut3); - - pValues3[0] = nppLut3[0].ptr(); - pValues3[1] = nppLut3[1].ptr(); - pValues3[2] = nppLut3[2].ptr(); - } - } - - void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& _stream) - { - GpuMat src = _src.getGpuMat(); - - const int cn = src.channels(); - - CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 ); - CV_Assert( lut_cn == 1 || lut_cn == cn ); - - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); - - cudaStream_t stream = StreamAccessor::getStream(_stream); - - NppStreamHandler h(stream); - - NppiSize sz; - sz.height = src.rows; - sz.width = src.cols; - - if (src.type() == CV_8UC1) - { - nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, nppLut.ptr(), pLevels, 256) ); - } - else - { - nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, pValues3, pLevels3, nValues3) ); - } - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } -} - -#endif // (CUDA_VERSION >= 5000) - -Ptr cv::cuda::createLookUpTable(InputArray lut) -{ - return makePtr(lut); -} - #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudaarithm/src/cuda/lut.cu b/modules/cudaarithm/src/cuda/lut.cu new file mode 100644 index 0000000..a8d5bc5 --- /dev/null +++ b/modules/cudaarithm/src/cuda/lut.cu @@ -0,0 +1,207 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudaarithm.hpp" +#include "opencv2/cudev.hpp" + +using namespace cv; +using namespace cv::cudev; + +namespace +{ + texture texLutTable; + + class LookUpTableImpl : public LookUpTable + { + public: + LookUpTableImpl(InputArray lut); + ~LookUpTableImpl(); + + void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + + private: + GpuMat d_lut; + cudaTextureObject_t texLutTableObj; + bool cc30; + }; + + LookUpTableImpl::LookUpTableImpl(InputArray _lut) + { + if (_lut.kind() == _InputArray::GPU_MAT) + { + d_lut = _lut.getGpuMat(); + } + else + { + Mat h_lut = _lut.getMat(); + d_lut.upload(Mat(1, 256, h_lut.type(), h_lut.data)); + } + + CV_Assert( d_lut.depth() == CV_8U ); + CV_Assert( d_lut.rows == 1 && d_lut.cols == 256 ); + + cc30 = deviceSupports(FEATURE_SET_COMPUTE_30); + + if (cc30) + { + // Use the texture object + cudaResourceDesc texRes; + std::memset(&texRes, 0, sizeof(texRes)); + texRes.resType = cudaResourceTypeLinear; + texRes.res.linear.devPtr = d_lut.data; + texRes.res.linear.desc = cudaCreateChannelDesc(); + texRes.res.linear.sizeInBytes = 256 * d_lut.channels() * sizeof(uchar); + + cudaTextureDesc texDescr; + std::memset(&texDescr, 0, sizeof(texDescr)); + + CV_CUDEV_SAFE_CALL( cudaCreateTextureObject(&texLutTableObj, &texRes, &texDescr, 0) ); + } + else + { + // Use the texture reference + cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + CV_CUDEV_SAFE_CALL( cudaBindTexture(0, &texLutTable, d_lut.data, &desc) ); + } + } + + LookUpTableImpl::~LookUpTableImpl() + { + if (cc30) + { + // Use the texture object + cudaDestroyTextureObject(texLutTableObj); + } + else + { + // Use the texture reference + cudaUnbindTexture(texLutTable); + } + } + + struct LutTablePtrC1 + { + typedef uchar value_type; + typedef uchar index_type; + + cudaTextureObject_t texLutTableObj; + + __device__ __forceinline__ uchar operator ()(uchar, uchar x) const + { + #if CV_CUDEV_ARCH < 300 + // Use the texture reference + return tex1Dfetch(texLutTable, x); + #else + // Use the texture object + return tex1Dfetch(texLutTableObj, x); + #endif + } + }; + struct LutTablePtrC3 + { + typedef uchar3 value_type; + typedef uchar3 index_type; + + cudaTextureObject_t texLutTableObj; + + __device__ __forceinline__ uchar3 operator ()(const uchar3&, const uchar3& x) const + { + #if CV_CUDEV_ARCH < 300 + // Use the texture reference + return make_uchar3(tex1Dfetch(texLutTable, x.x * 3), tex1Dfetch(texLutTable, x.y * 3 + 1), tex1Dfetch(texLutTable, x.z * 3 + 2)); + #else + // Use the texture object + return make_uchar3(tex1Dfetch(texLutTableObj, x.x * 3), tex1Dfetch(texLutTableObj, x.y * 3 + 1), tex1Dfetch(texLutTableObj, x.z * 3 + 2)); + #endif + } + }; + + void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& stream) + { + GpuMat src = _src.getGpuMat(); + + const int cn = src.channels(); + const int lut_cn = d_lut.channels(); + + CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 ); + CV_Assert( lut_cn == 1 || lut_cn == cn ); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + if (lut_cn == 1) + { + GpuMat_ src1(src.reshape(1)); + GpuMat_ dst1(dst.reshape(1)); + + LutTablePtrC1 tbl; + tbl.texLutTableObj = texLutTableObj; + + dst1.assign(lut_(src1, tbl), stream); + } + else if (lut_cn == 3) + { + GpuMat_& src3 = (GpuMat_&) src; + GpuMat_& dst3 = (GpuMat_&) dst; + + LutTablePtrC3 tbl; + tbl.texLutTableObj = texLutTableObj; + + dst3.assign(lut_(src3, tbl), stream); + } + } +} + +Ptr cv::cuda::createLookUpTable(InputArray lut) +{ + return makePtr(lut); +} + +#endif diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp index e47719c..accf545 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/lut.hpp @@ -47,6 +47,7 @@ #define __OPENCV_CUDEV_PTR2D_LUT_HPP__ #include "../common.hpp" +#include "../util/vec_traits.hpp" #include "../grid/copy.hpp" #include "traits.hpp" #include "gpumat.hpp" @@ -63,7 +64,8 @@ template struct LutPtr __device__ __forceinline__ typename PtrTraits::value_type operator ()(typename PtrTraits::index_type y, typename PtrTraits::index_type x) const { - return tbl(0, src(y, x)); + typedef typename PtrTraits::index_type tbl_index_type; + return tbl(VecTraits::all(0), src(y, x)); } }; @@ -81,8 +83,6 @@ template struct LutPtrSz : LutPtr __host__ LutPtrSz::ptr_type, typename PtrTraits::ptr_type> lutPtr(const SrcPtr& src, const TablePtr& tbl) { - CV_Assert( getRows(tbl) == 1 ); - LutPtrSz::ptr_type, typename PtrTraits::ptr_type> ptr; ptr.src = shrinkPtr(src); ptr.tbl = shrinkPtr(tbl);