//virtual void allocate(int dims, const int* sizes, int type, int*& refcount,
// uchar*& datastart, uchar*& data, size_t* step) = 0;
//virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0;
- virtual UMatData* allocate(int dims, const int* sizes,
- int type, size_t* step) const = 0;
+ virtual UMatData* allocate(int dims, const int* sizes, int type,
+ void* data, size_t* step, int flags) const = 0;
virtual bool allocate(UMatData* data, int accessflags) const = 0;
virtual void deallocate(UMatData* data) const = 0;
- virtual void map(UMatData* data, int accessflags) const = 0;
- virtual void unmap(UMatData* data) const = 0;
+ virtual void sync(UMatData* u) const;
+ virtual void map(UMatData* data, int accessflags) const;
+ virtual void unmap(UMatData* data) const;
virtual void download(UMatData* data, void* dst, int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
- const size_t dststep[]) const = 0;
+ const size_t dststep[]) const;
virtual void upload(UMatData* data, const void* src, int dims, const size_t sz[],
const size_t dstofs[], const size_t dststep[],
- const size_t srcstep[]) const = 0;
+ const size_t srcstep[]) const;
virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
- const size_t dstofs[], const size_t dststep[], bool sync) const = 0;
+ const size_t dstofs[], const size_t dststep[], bool sync) const;
};
struct CV_EXPORTS UMatData
{
enum { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2,
- DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24 };
+ DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24,
+ USER_ALLOCATED=32 };
UMatData(const MatAllocator* allocator);
+ ~UMatData();
// provide atomic access to the structure
void lock();
inline
-UMat::~UMat()
-{
- release();
- if( step.p != step.buf )
- fastFree(step.p);
-}
-
-inline
UMat& UMat::operator = (const UMat& m)
{
if( this != &m )
namespace cv {
-class StdMatAllocator : public MatAllocator
+void MatAllocator::sync(UMatData*) const
{
-public:
- UMatData* allocate(int dims, const int* sizes, int type, size_t* step) const
- {
- size_t total = CV_ELEM_SIZE(type);
- for( int i = dims-1; i >= 0; i-- )
- {
- if( step )
- step[i] = total;
- total *= sizes[i];
- }
- uchar* data = (uchar*)fastMalloc(total);
- UMatData* u = new UMatData(this);
- u->data = u->origdata = data;
- u->size = total;
- u->refcount = 1;
+}
- return u;
- }
+void MatAllocator::map(UMatData*, int) const
+{
+}
- bool allocate(UMatData* u, int accessFlags) const
- {
- if(!u) return false;
- if(u->handle != 0)
- return true;
- return UMat::getStdAllocator()->allocate(u, accessFlags);
- }
+void MatAllocator::unmap(UMatData* u) const
+{
+ if(u->urefcount == 0 && u->refcount == 0)
+ deallocate(u);
+}
- void deallocate(UMatData* u) const
+void MatAllocator::download(UMatData* u, void* dstptr,
+ int dims, const size_t sz[],
+ const size_t srcofs[], const size_t srcstep[],
+ const size_t dststep[]) const
+{
+ if(!u)
+ return;
+ int isz[CV_MAX_DIM];
+ uchar* srcptr = u->data;
+ for( int i = 0; i < dims; i++ )
{
- if(u)
- {
- fastFree(u->origdata);
- delete u;
- }
+ CV_Assert( sz[i] <= (size_t)INT_MAX );
+ if( sz[i] == 0 )
+ return;
+ if( srcofs )
+ srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
+ isz[i] = (int)sz[i];
}
- void map(UMatData*, int) const
- {
- }
+ Mat src(dims, isz, CV_8U, srcptr, srcstep);
+ Mat dst(dims, isz, CV_8U, dstptr, dststep);
+
+ const Mat* arrays[] = { &src, &dst };
+ uchar* ptrs[2];
+ NAryMatIterator it(arrays, ptrs, 2);
+ size_t j, planesz = it.size;
- void unmap(UMatData* u) const
+ for( j = 0; j < it.nplanes; j++, ++it )
+ memcpy(ptrs[1], ptrs[0], planesz);
+}
+
+
+void MatAllocator::upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
+ const size_t dstofs[], const size_t dststep[],
+ const size_t srcstep[]) const
+{
+ if(!u)
+ return;
+ int isz[CV_MAX_DIM];
+ uchar* dstptr = u->data;
+ for( int i = 0; i < dims; i++ )
{
- if(u->urefcount == 0)
- deallocate(u);
+ CV_Assert( sz[i] <= (size_t)INT_MAX );
+ if( sz[i] == 0 )
+ return;
+ if( dstofs )
+ dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
+ isz[i] = (int)sz[i];
}
- void download(UMatData* u, void* dstptr,
- int dims, const size_t sz[],
+ Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
+ Mat dst(dims, isz, CV_8U, dstptr, dststep);
+
+ const Mat* arrays[] = { &src, &dst };
+ uchar* ptrs[2];
+ NAryMatIterator it(arrays, ptrs, 2);
+ size_t j, planesz = it.size;
+
+ for( j = 0; j < it.nplanes; j++, ++it )
+ memcpy(ptrs[1], ptrs[0], planesz);
+}
+
+void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t sz[],
const size_t srcofs[], const size_t srcstep[],
- const size_t dststep[]) const
+ const size_t dstofs[], const size_t dststep[], bool sync) const
+{
+ if(!usrc || !udst)
+ return;
+ int isz[CV_MAX_DIM];
+ uchar* srcptr = usrc->data;
+ uchar* dstptr = udst->data;
+ for( int i = 0; i < dims; i++ )
{
- if(!u)
- return;
- int isz[CV_MAX_DIM];
- uchar* srcptr = u->data;
- for( int i = 0; i < dims; i++ )
- {
- CV_Assert( sz[i] <= (size_t)INT_MAX );
- if( sz[i] == 0 )
- return;
- if( srcofs )
- srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
- isz[i] = (int)sz[i];
- }
+ CV_Assert( sz[i] <= (size_t)INT_MAX );
+ if( sz[i] == 0 )
+ return;
+ if( srcofs )
+ srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
+ if( dstofs )
+ dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
+ isz[i] = (int)sz[i];
+ }
- Mat src(dims, isz, CV_8U, srcptr, srcstep);
- Mat dst(dims, isz, CV_8U, dstptr, dststep);
+ Mat src(dims, isz, CV_8U, srcptr, srcstep);
+ Mat dst(dims, isz, CV_8U, dstptr, dststep);
- const Mat* arrays[] = { &src, &dst };
- uchar* ptrs[2];
- NAryMatIterator it(arrays, ptrs, 2);
- size_t j, planesz = it.size;
+ const Mat* arrays[] = { &src, &dst };
+ uchar* ptrs[2];
+ NAryMatIterator it(arrays, ptrs, 2);
+ size_t j, planesz = it.size;
- for( j = 0; j < it.nplanes; j++, ++it )
- memcpy(ptrs[1], ptrs[0], planesz);
- }
+ for( j = 0; j < it.nplanes; j++, ++it )
+ memcpy(ptrs[1], ptrs[0], planesz);
+}
- void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
- const size_t dstofs[], const size_t dststep[],
- const size_t srcstep[]) const
+class StdMatAllocator : public MatAllocator
+{
+public:
+ UMatData* allocate(int dims, const int* sizes, int type,
+ void* data0, size_t* step, int /*flags*/) const
{
- if(!u)
- return;
- int isz[CV_MAX_DIM];
- uchar* dstptr = u->data;
- for( int i = 0; i < dims; i++ )
+ size_t total = CV_ELEM_SIZE(type);
+ for( int i = dims-1; i >= 0; i-- )
{
- CV_Assert( sz[i] <= (size_t)INT_MAX );
- if( sz[i] == 0 )
- return;
- if( dstofs )
- dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
- isz[i] = (int)sz[i];
+ if( step )
+ {
+ if( data0 && step[i] != CV_AUTOSTEP )
+ {
+ CV_Assert(total <= step[i]);
+ total = step[i];
+ }
+ else
+ step[i] = total;
+ }
+ total *= sizes[i];
}
+ uchar* data = data0 ? (uchar*)data0 : (uchar*)fastMalloc(total);
+ UMatData* u = new UMatData(this);
+ u->data = u->origdata = data;
+ u->size = total;
+ u->refcount = data0 == 0;
+ if(data0)
+ u->flags |= UMatData::USER_ALLOCATED;
- Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
- Mat dst(dims, isz, CV_8U, dstptr, dststep);
-
- const Mat* arrays[] = { &src, &dst };
- uchar* ptrs[2];
- NAryMatIterator it(arrays, ptrs, 2);
- size_t j, planesz = it.size;
+ return u;
+ }
- for( j = 0; j < it.nplanes; j++, ++it )
- memcpy(ptrs[1], ptrs[0], planesz);
+ bool allocate(UMatData* u, int /*accessFlags*/) const
+ {
+ if(!u) return false;
+ CV_XADD(&u->urefcount, 1);
+ return true;
}
- void copy(UMatData* usrc, UMatData* udst, int dims, const size_t sz[],
- const size_t srcofs[], const size_t srcstep[],
- const size_t dstofs[], const size_t dststep[], bool) const
+ void deallocate(UMatData* u) const
{
- if(!usrc || !udst)
- return;
- int isz[CV_MAX_DIM];
- uchar* srcptr = usrc->data;
- uchar* dstptr = udst->data;
- for( int i = 0; i < dims; i++ )
+ if(u && u->refcount == 0)
{
- CV_Assert( sz[i] <= (size_t)INT_MAX );
- if( sz[i] == 0 )
- return;
- if( srcofs )
- srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
- if( dstofs )
- dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
- isz[i] = (int)sz[i];
+ if( !(u->flags & UMatData::USER_ALLOCATED) )
+ {
+ fastFree(u->origdata);
+ u->origdata = 0;
+ }
+ delete u;
}
-
- Mat src(dims, isz, CV_8U, srcptr, srcstep);
- Mat dst(dims, isz, CV_8U, dstptr, dststep);
-
- const Mat* arrays[] = { &src, &dst };
- uchar* ptrs[2];
- NAryMatIterator it(arrays, ptrs, 2);
- size_t j, planesz = it.size;
-
- for( j = 0; j < it.nplanes; j++, ++it )
- memcpy(ptrs[1], ptrs[0], planesz);
}
};
a = a0;
try
{
- u = a->allocate(dims, size, _type, step.p);
+ u = a->allocate(dims, size, _type, 0, step.p, 0);
CV_Assert(u != 0);
}
catch(...)
{
if(a != a0)
- u = a0->allocate(dims, size, _type, step.p);
+ u = a0->allocate(dims, size, _type, 0, step.p, 0);
CV_Assert(u != 0);
}
CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
return 0;
}
- return funcname ? dlsym(handle, funcname) : 0;
+ return funcname && handle ? dlsym(handle, funcname) : 0;
}
#elif defined WIN32 || defined _WIN32
Queue& Queue::getDefault()
{
Queue& q = TLSData::get()->oclQueue;
- if( !q.p )
+ if( !q.p && haveOpenCL() )
q.create(Context2::getDefault());
return q;
}
}
-bool Kernel::run(int dims, size_t globalsize[], size_t localsize[],
+bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
bool sync, const Queue& q)
{
if(!p || !p->handle || p->e != 0)
return false;
- AutoBuffer<size_t> _globalSize(dims);
- size_t * globalSizePtr = (size_t *)_globalSize;
- for (int i = 0; i < dims; ++i)
- globalSizePtr[i] = localsize == NULL ? globalsize[i] :
- ((globalsize[i] + localsize[i] - 1) / localsize[i]) * localsize[i];
-
cl_command_queue qq = getQueue(q);
- size_t offset[CV_MAX_DIM] = {0};
+ size_t offset[CV_MAX_DIM] = {0}, globalsize[CV_MAX_DIM] = {1,1,1}, localsize[CV_MAX_DIM] = {1,1,1};
+ size_t total = 1;
+ for (int i = 0; i < dims; i++)
+ {
+ size_t val0 = _localsize ? _localsize[i] :
+ dims == 1 ? 64 : dims == 2 ? 16>>i : dims == 3 ? 8>>(i>0) : 1;
+ size_t val = 1;
+ while( val*2 < val0 )
+ val *= 2;
+ if( _localsize )
+ localsize[i] = val;
+ CV_Assert(_globalsize && _globalsize[i] >= 0);
+ total *= _globalsize[i];
+ globalsize[i] = ((_globalsize[i] + val - 1)/val)*val;
+ }
+ if( total == 0 )
+ return true;
cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
- offset, globalSizePtr, localsize, 0, 0,
+ offset, globalsize, _localsize ? localsize : 0, 0, 0,
sync ? 0 : &p->e);
if( sync || retval < 0 )
{
retval = clBuildProgram(handle, n,
(const cl_device_id*)deviceList,
buildflags.c_str(), 0, 0);
- if( retval == CL_BUILD_PROGRAM_FAILURE )
+ if( retval < 0 )
{
- char buf[1<<16];
size_t retsz = 0;
- clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0], CL_PROGRAM_BUILD_LOG,
- sizeof(buf)-16, buf, &retsz);
- errmsg = String(buf);
- CV_Error_(Error::StsAssert, ("OpenCL program can not be built: %s", errmsg.c_str()));
+ retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
+ CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
+ if( retval >= 0 && retsz > 0 )
+ {
+ AutoBuffer<char> bufbuf(retsz + 16);
+ char* buf = bufbuf;
+ retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
+ CL_PROGRAM_BUILD_LOG, retsz+1, buf, &retsz);
+ if( retval >= 0 )
+ {
+ errmsg = String(buf);
+ CV_Error_(Error::StsAssert, ("OpenCL program can not be built: %s", errmsg.c_str()));
+ }
+ }
}
CV_Assert(retval >= 0);
}
class OpenCLAllocator : public MatAllocator
{
public:
- OpenCLAllocator() {}
+ OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); }
- UMatData* defaultAllocate(int dims, const int* sizes, int type, size_t* step) const
+ UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step, int flags) const
{
- UMatData* u = Mat::getStdAllocator()->allocate(dims, sizes, type, step);
+ UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags);
u->urefcount = 1;
u->refcount = 0;
return u;
}
- void getBestFlags(const Context2& ctx, int& createFlags, int& flags0) const
+ void getBestFlags(const Context2& ctx, int /*flags*/, int& createFlags, int& flags0) const
{
const Device& dev = ctx.device(0);
createFlags = CL_MEM_READ_WRITE;
flags0 = UMatData::COPY_ON_MAP;
}
- UMatData* allocate(int dims, const int* sizes, int type, size_t* step) const
+ UMatData* allocate(int dims, const int* sizes, int type,
+ void* data, size_t* step, int flags) const
{
if(!useOpenCL())
- return defaultAllocate(dims, sizes, type, step);
+ return defaultAllocate(dims, sizes, type, data, step, flags);
+ CV_Assert(data == 0);
size_t total = CV_ELEM_SIZE(type);
for( int i = dims-1; i >= 0; i-- )
{
Context2& ctx = Context2::getDefault();
int createFlags = 0, flags0 = 0;
- getBestFlags(ctx, createFlags, flags0);
+ getBestFlags(ctx, flags, createFlags, flags0);
cl_int retval = 0;
void* handle = clCreateBuffer((cl_context)ctx.ptr(),
createFlags, total, 0, &retval);
if( !handle || retval < 0 )
- return defaultAllocate(dims, sizes, type, step);
+ return defaultAllocate(dims, sizes, type, data, step, flags);
UMatData* u = new UMatData(this);
u->data = 0;
u->size = total;
CV_Assert(u->origdata != 0);
Context2& ctx = Context2::getDefault();
int createFlags = 0, flags0 = 0;
- getBestFlags(ctx, createFlags, flags0);
+ getBestFlags(ctx, accessFlags, createFlags, flags0);
cl_context ctx_handle = (cl_context)ctx.ptr();
cl_int retval = 0;
return true;
}
+ void sync(UMatData* u) const
+ {
+ cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+ clFinish(q);
+
+ if( u->hostCopyObsolete() && u->handle &&
+ u->tempCopiedUMat() && u->refcount > 0 && u->origdata)
+ {
+ UMatDataAutoLock lock(u);
+ clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
+ u->size, u->origdata, 0, 0, 0);
+ u->markHostCopyObsolete(false);
+ }
+ else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
+ {
+ UMatDataAutoLock lock(u);
+ clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
+ u->size, u->data, 0, 0, 0);
+ }
+ }
+
void deallocate(UMatData* u) const
{
if(!u)
return;
+ UMatDataAutoLock lock(u);
// TODO: !!! when we add Shared Virtual Memory Support,
- // this function (as well as the others should be corrected)
+ // this function (as well as the others) should be corrected
CV_Assert(u->handle != 0 && u->urefcount == 0);
if(u->tempUMat())
{
if( u->hostCopyObsolete() && u->refcount > 0 && u->tempCopiedUMat() )
{
- clEnqueueWriteBuffer((cl_command_queue)Queue::getDefault().ptr(),
+ clEnqueueReadBuffer((cl_command_queue)Queue::getDefault().ptr(),
(cl_mem)u->handle, CL_TRUE, 0,
u->size, u->origdata, 0, 0, 0);
}
clReleaseMemObject((cl_mem)u->handle);
u->handle = 0;
u->currAllocator = u->prevAllocator;
- if(u->data && u->copyOnMap())
+ if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
fastFree(u->data);
u->data = u->origdata;
if(u->refcount == 0)
}
else
{
- if(u->data && u->copyOnMap())
+ if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
+ {
fastFree(u->data);
+ u->data = 0;
+ }
clReleaseMemObject((cl_mem)u->handle);
u->handle = 0;
delete u;
UMatDataAutoLock autolock(u);
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+ cl_int retval = 0;
if( !u->copyOnMap() && u->data )
{
- CV_Assert( clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0) >= 0 );
+ CV_Assert( (retval = clEnqueueUnmapMemObject(q,
+ (cl_mem)u->handle, u->data, 0, 0, 0)) >= 0 );
+ clFinish(q);
u->data = 0;
}
else if( u->copyOnMap() && u->deviceCopyObsolete() )
{
- CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
- u->size, u->data, 0, 0, 0) >= 0 );
+ CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
+ u->size, u->data, 0, 0, 0)) >= 0 );
}
u->markDeviceCopyObsolete(false);
u->markHostCopyObsolete(false);
if( sync )
clFinish(q);
}
+
+ MatAllocator* matStdAllocator;
};
MatAllocator* getOpenCLAllocator()
userdata = 0;
}
+UMatData::~UMatData()
+{
+ prevAllocator = currAllocator = 0;
+ urefcount = refcount = 0;
+ data = origdata = 0;
+ size = 0;
+ flags = 0;
+ handle = 0;
+ userdata = 0;
+}
+
void UMatData::lock()
{
umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock();
MatAllocator* UMat::getStdAllocator()
{
- return ocl::getOpenCLAllocator();
+ if( ocl::haveOpenCL() )
+ return ocl::getOpenCLAllocator();
+ return Mat::getStdAllocator();
}
void swap( UMat& a, UMat& b )
UMat Mat::getUMat(int accessFlags) const
{
UMat hdr;
- if(!u)
+ if(!data)
return hdr;
- UMat::getStdAllocator()->allocate(u, accessFlags);
+ UMatData* temp_u = u;
+ if(!temp_u)
+ {
+ MatAllocator *a = allocator, *a0 = getStdAllocator();
+ if(!a)
+ a = a0;
+ temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags);
+ }
+ UMat::getStdAllocator()->allocate(temp_u, accessFlags);
hdr.flags = flags;
setSize(hdr, dims, size.p, step.p);
finalizeHdr(hdr);
- hdr.u = u;
+ hdr.u = temp_u;
hdr.offset = data - datastart;
return hdr;
}
a = a0;
try
{
- u = a->allocate(dims, size, _type, step.p);
+ u = a->allocate(dims, size, _type, 0, step.p, 0);
CV_Assert(u != 0);
}
catch(...)
{
if(a != a0)
- u = a0->allocate(dims, size, _type, step.p);
+ u = a0->allocate(dims, size, _type, 0, step.p, 0);
CV_Assert(u != 0);
}
CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
}
}
+
+UMat::~UMat()
+{
+ if( u && u->refcount > 0 )
+ u->currAllocator->sync(u);
+ release();
+ if( step.p != step.buf )
+ fastFree(step.p);
+}
+
void UMat::deallocate()
{
u->currAllocator->deallocate(u);
{
if(!u)
return Mat();
- u->currAllocator->map(u, accessFlags);
+ u->currAllocator->map(u, accessFlags | ACCESS_READ);
CV_Assert(u->data != 0);
Mat hdr(dims, size.p, type(), u->data + offset, step.p);
hdr.u = u;
}
TEST(Core_UMat, base) { CV_UMatTest test; test.safe_run(); }
+
+TEST(Core_UMat, simple)
+{
+ {
+ int a[3] = { 1, 2, 3 };
+ Mat m = Mat(1, 1, CV_32SC3, a);
+ UMat u = m.getUMat(ACCESS_READ);
+ EXPECT_NE((void*)NULL, u.u);
+ }
+
+ {
+ Mat m(10, 10, CV_8UC1), ref;
+ for (int y = 0; y < m.rows; ++y)
+ {
+ uchar * const ptr = m.ptr<uchar>(y);
+ for (int x = 0; x < m.cols; ++x)
+ ptr[x] = x + y * 2;
+ }
+
+ ref = m.clone();
+ Rect r(1, 1, 8, 8);
+ ref(r).setTo(17);
+
+ {
+ UMat um = m(r).getUMat(ACCESS_WRITE);
+ um.setTo(17);
+ }
+
+ double err = norm(m, ref, NORM_INF);
+ if(err > 0)
+ {
+ std::cout << "m: " << m << std::endl;
+ std::cout << "ref: " << ref << std::endl;
+ }
+ EXPECT_EQ(err, 0.);
+ }
+}
UMat src = _src.getUMat(), dst;
Size sz = src.size(), dstSz = sz;
int scn = src.channels(), depth = src.depth(), bidx;
+ int dims = 2, stripeSize = 32;
size_t globalsize[] = { src.cols, src.rows };
ocl::Kernel k;
bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2;
dcn = 1;
k.create("RGB2Gray", ocl::imgproc::cvtcolor_oclsrc,
- format("-D depth=%d -D scn=%d -D dcn=1 -D bidx=%d", depth, scn, bidx));
+ format("-D depth=%d -D scn=%d -D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d",
+ depth, scn, bidx, stripeSize));
+ globalsize[0] = (src.cols + stripeSize-1)/stripeSize;
break;
}
case COLOR_GRAY2BGR:
_dst.create(dstSz, CV_MAKETYPE(depth, dcn));
dst = _dst.getUMat();
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst));
- ok = k.run(2, globalsize, 0, false);
+ ok = k.run(dims, globalsize, 0, false);
}
return ok;
}
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
#endif
+#ifndef STRIPE_SIZE
+#define STRIPE_SIZE 1
+#endif
+
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
enum
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols)
{
+#if 0
const int x = get_global_id(0);
const int y = get_global_id(1);
dst[0] = (DATA_TYPE)CV_DESCALE((src[bidx] * B2Y + src[1] * G2Y + src[(bidx^2)] * R2Y), yuv_shift);
#endif
}
+#else
+ const int x0 = get_global_id(0)*STRIPE_SIZE;
+ const int x1 = min(x0 + STRIPE_SIZE, cols);
+ const int y = get_global_id(1);
+
+ if( y < rows )
+ {
+ __global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + mad24(y, srcstep, srcoffset)) + x0*scn;
+ __global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + mad24(y, dststep, dstoffset));
+ int x;
+ for( x = x0; x < x1; x++, src += scn )
+#ifdef DEPTH_5
+ dst[x] = src[bidx] * 0.114f + src[1] * 0.587f + src[(bidx^2)] * 0.299f;
+#else
+ dst[x] = (DATA_TYPE)(mad24(src[bidx], B2Y, mad24(src[1], G2Y,
+ mad24(src[(bidx^2)], R2Y, 1 << (yuv_shift-1)))) >> yuv_shift);
+#endif
+ }
+#endif
}
__kernel void Gray2RGB(__global const uchar* srcptr, int srcstep, int srcoffset,
#include "test_precomp.hpp"
#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" // for OpenCL types: cl_mem
+#include "opencv2/core/ocl.hpp"
TEST(TestAPI, openCLExecuteKernelInterop)
{
EXPECT_LE(checkNorm(cpuMat, dst), 1e-3);
}
+
+TEST(OCL_TestTAPI, performance)
+{
+ cv::RNG rng;
+#if 1
+ cv::Mat src(1280,768,CV_8UC4), dst;
+ rng.fill(src, RNG::UNIFORM, 0, 255);
+#else
+ cv::Mat src = cv::imread("/Users/vp/work/opencv/samples/c/lena.jpg", 1), dst;
+#endif
+
+ cv::UMat usrc, udst;
+ src.copyTo(usrc);
+
+ cv::ocl::oclMat osrc(src);
+ cv::ocl::oclMat odst;
+
+ int cvtcode = cv::COLOR_BGR2GRAY;
+ int i, niters = 10;
+ double t;
+
+ cv::ocl::cvtColor(osrc, odst, cvtcode);
+ cv::ocl::finish();
+ t = (double)cv::getTickCount();
+ for(i = 0; i < niters; i++)
+ {
+ cv::ocl::cvtColor(osrc, odst, cvtcode);
+ }
+ cv::ocl::finish();
+ t = (double)cv::getTickCount() - t;
+ printf("ocl exec time = %gms per iter\n", t*1000./niters/cv::getTickFrequency());
+
+ cv::cvtColor(usrc, udst, cvtcode);
+ cv::ocl::finish2();
+ t = (double)cv::getTickCount();
+ for(i = 0; i < niters; i++)
+ {
+ cv::cvtColor(usrc, udst, cvtcode);
+ }
+ cv::ocl::finish2();
+ t = (double)cv::getTickCount() - t;
+ printf("t-api exec time = %gms per iter\n", t*1000./niters/cv::getTickFrequency());
+
+ cv::cvtColor(src, dst, cvtcode);
+ t = (double)cv::getTickCount();
+ for(i = 0; i < niters; i++)
+ {
+ cv::cvtColor(src, dst, cvtcode);
+ }
+ t = (double)cv::getTickCount() - t;
+ printf("cpu exec time = %gms per iter\n", t*1000./niters/cv::getTickFrequency());
+ /*cv::imshow("result0", dst);
+ cv::imshow("result1", udst);
+ cv::waitKey();
+ cv::destroyWindow("result0");
+ cv::destroyWindow("result1");*/
+}
+
return u;
}
- UMatData* allocate(int dims0, const int* sizes, int type, size_t* step) const
+ UMatData* allocate(int dims0, const int* sizes, int type, void* data, size_t* step, int flags) const
{
+ if( data != 0 )
+ {
+ CV_Error(Error::StsAssert, "The data should normally be NULL!");
+ // probably this is safe to do in such extreme case
+ return stdAllocator->allocate(dims0, sizes, type, data, step, flags);
+ }
PyEnsureGIL gil;
int depth = CV_MAT_DEPTH(type);
{
PyEnsureGIL gil;
PyObject* o = (PyObject*)u->userdata;
- Py_DECREF(o);
+ Py_XDECREF(o);
delete u;
}
}
- void map(UMatData*, int) const
- {
- }
-
- void unmap(UMatData* u) const
- {
- if(u->urefcount == 0)
- deallocate(u);
- }
-
- void download(UMatData* u, void* dstptr,
- int dims, const size_t sz[],
- const size_t srcofs[], const size_t srcstep[],
- const size_t dststep[]) const
- {
- stdAllocator->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
- }
-
- void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
- const size_t dstofs[], const size_t dststep[],
- const size_t srcstep[]) const
- {
- stdAllocator->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
- }
-
- void copy(UMatData* usrc, UMatData* udst, int dims, const size_t sz[],
- const size_t srcofs[], const size_t srcstep[],
- const size_t dstofs[], const size_t dststep[], bool sync) const
- {
- stdAllocator->copy(usrc, udst, dims, sz, srcofs, srcstep, dstofs, dststep, sync);
- }
-
const MatAllocator* stdAllocator;
};