From: Alexander Alekhin Date: Thu, 6 Feb 2014 13:27:29 +0000 (+0400) Subject: ocl: workaround for failures with misaligned host buffers X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~631^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e371901dd9c4d7af3d3e372bb8d917bc9202fc58;p=profile%2Fivi%2Fopencv.git ocl: workaround for failures with misaligned host buffers --- diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 0bbfb62..3ae60e1 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -3472,6 +3472,65 @@ public: } }; +#if defined _MSC_VER +#pragma warning(disable:4127) // conditional expression is constant +#endif +template +class AlignedDataPtr +{ +protected: + const size_t size_; + uchar* const originPtr_; + const size_t alignment_; + uchar* ptr_; + uchar* allocatedPtr_; + +public: + AlignedDataPtr(uchar* ptr, size_t size, size_t alignment) + : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL) + { + CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n + if (((size_t)ptr_ & (alignment - 1)) != 0) + { + allocatedPtr_ = new uchar[size_ + alignment - 1]; + ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1)); + if (readAccess) + { + memcpy(ptr_, originPtr_, size_); + } + } + } + + uchar* getAlignedPtr() const + { + CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0); + return ptr_; + } + + ~AlignedDataPtr() + { + if (allocatedPtr_) + { + if (writeAccess) + { + memcpy(originPtr_, ptr_, size_); + } + delete[] allocatedPtr_; + allocatedPtr_ = NULL; + } + ptr_ = NULL; + } +private: + AlignedDataPtr(const AlignedDataPtr&); // disabled + AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled +}; +#if defined _MSC_VER +#pragma warning(default:4127) // conditional expression is constant +#endif + +#ifndef CV_OPENCL_DATA_PTR_ALIGNMENT +#define CV_OPENCL_DATA_PTR_ALIGNMENT 16 +#endif class OpenCLAllocator : public MatAllocator { @@ -3614,8 +3673,9 @@ public: cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); if( u->tempCopiedUMat() ) { + AlignedDataPtr alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, - u->size, u->origdata, 0, 0, 0) == CL_SUCCESS); + u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS); } else { @@ -3697,8 +3757,9 @@ public: if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() ) { + AlignedDataPtr alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, - u->size, u->data, 0, 0, 0) == CL_SUCCESS ); + u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS ); u->markHostCopyObsolete(false); } } @@ -3723,8 +3784,9 @@ public: } else if( u->copyOnMap() && u->deviceCopyObsolete() ) { + AlignedDataPtr alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, - u->size, u->data, 0, 0, 0)) == CL_SUCCESS ); + u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS ); } u->markDeviceCopyObsolete(false); u->markHostCopyObsolete(false); @@ -3829,16 +3891,18 @@ public: total, new_sz, srcrawofs, new_srcofs, new_srcstep, dstrawofs, new_dstofs, new_dststep); + + AlignedDataPtr alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); if( iscontinuous ) { CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, - srcrawofs, total, dstptr, 0, 0, 0) == CL_SUCCESS ); + srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS ); } else { CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE, new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1], - new_dststep[0], new_dststep[1], dstptr, 0, 0, 0) == CL_SUCCESS ); + new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS ); } } @@ -3879,6 +3943,7 @@ public: CV_Assert( u->handle != 0 ); cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); + AlignedDataPtr alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); if( iscontinuous ) { CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,