class CV_EXPORTS Kernel
{
public:
- class CV_EXPORTS Callback
- {
- public:
- virtual ~Callback() {}
- virtual void operator()() = 0;
- };
-
Kernel();
Kernel(const char* kname, const Program& prog);
Kernel(const char* kname, const ProgramSource& prog,
bool create(const char* kname, const ProgramSource& prog,
const String& buildopts, String& errmsg);
- int set(int i, const void* value, size_t sz);
- int set(int i, const UMat& m);
- int set(int i, const KernelArg& arg);
- template<typename _Tp> int set(int i, const _Tp& value)
+ void set(int i, const void* value, size_t sz);
+ void set(int i, const UMat& m);
+ void set(int i, const KernelArg& arg);
+ template<typename _Tp> void set(int i, const _Tp& value)
{ return set(i, &value, sizeof(value)); }
- template<typename _Tp1>
- Kernel& args(_Tp1 a1)
+ template<typename _Tp0>
+ Kernel& args(const _Tp0& a0)
{
- set(0, a1); return *this;
+ set(0, a0); return *this;
}
- template<typename _Tp1, typename _Tp2>
- Kernel& args(_Tp1 a1, _Tp2 a2)
+ template<typename _Tp0, typename _Tp1>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1)
{
- int i = set(0, a1); set(i, a2); return *this;
+ set(0, a0); set(1, a1); return *this;
}
- template<typename _Tp1, typename _Tp2, typename _Tp3>
- Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3)
+ template<typename _Tp0, typename _Tp1, typename _Tp2>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
{
- int i = set(0, a1); i = set(i, a2); set(i, a3); return *this;
+ set(0, a0); set(1, a1); set(2, a2); return *this;
}
- template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
- Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4)
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
{
- int i = set(0, a1); i = set(i, a2); i = set(i, a3); set(i, a4);
- return *this;
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); return *this;
}
- template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4, typename _Tp5>
- Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5)
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
+ const _Tp3& a3, const _Tp4& a4)
{
- int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); set(i, a5);
- return *this;
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); return *this;
}
- template<typename _Tp1, typename _Tp2, typename _Tp3,
- typename _Tp4, typename _Tp5, typename _Tp6>
- Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6)
+ template<typename _Tp0, typename _Tp1, typename _Tp2,
+ typename _Tp3, typename _Tp4, typename _Tp5>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
+ const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
{
- int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
- i = set(i, a5); set(i, a6); return *this;
+ set(0, a0); set(1, a1); set(2, a2);
+ set(3, a3); set(4, a4); set(5, a5); return *this;
}
- template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
- typename _Tp5, typename _Tp6, typename _Tp7>
- Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7)
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+ typename _Tp4, typename _Tp5, typename _Tp6>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
{
- int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
- i = set(i, a5); i = set(i, a6); set(i, a7); return *this;
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3);
+ set(4, a4); set(5, a5); set(6, a6); return *this;
}
- template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
- typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
- Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7, _Tp8 a8)
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+ typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
{
- int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
- i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8);
- return *this;
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3);
+ set(4, a4); set(5, a5); set(6, a6); set(7, a7); return *this;
}
- template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4, typename _Tp5,
- typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
- Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7, _Tp8 a8, _Tp9 a9)
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
+ typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+ const _Tp8& a8)
{
- int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
- i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8);
- set(i, a9); return *this;
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4);
+ set(5, a5); set(6, a6); set(7, a7); set(8, a8); return *this;
}
- template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4, typename _Tp5,
- typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9, typename _Tp10>
- Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7,
- _Tp8 a8, _Tp9 a9, _Tp10 a10)
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
+ typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+ const _Tp8& a8, const _Tp9& a9)
{
- int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
- i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8);
- i = set(i, a9); set(i, a10); return *this;
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
+ set(6, a6); set(7, a7); set(8, a8); set(9, a9); return *this;
}
- template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4, typename _Tp5,
- typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9,
- typename _Tp10, typename _Tp11>
- Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7,
- _Tp8 a8, _Tp9 a9, _Tp10 a10, _Tp11 a11)
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+ typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+ typename _Tp8, typename _Tp9, typename _Tp10>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+ const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
{
- int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
- i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8);
- i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
+ set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); return *this;
}
- template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4, typename _Tp5,
- typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9,
- typename _Tp10, typename _Tp11, typename _Tp12>
- Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7,
- _Tp8 a8, _Tp9 a9, _Tp10 a10, _Tp11 a11, _Tp12 a12)
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+ typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+ typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+ const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
{
- int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
- i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8);
- i = set(i, a9); i = set(i, a10); i = set(i, a11); set(i, a12);
- return *this;
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
+ set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); set(11, a11); return *this;
}
- void run(int dims, size_t offset[],
- size_t globalsize[], size_t localsize[], bool sync,
- const Ptr<Callback>& cleanupCallback=Ptr<Callback>(),
- const Queue& q=Queue());
- void runTask(bool sync,
- const Ptr<Callback>& cleanupCallback=Ptr<Callback>(),
- const Queue& q=Queue());
+ void run(int dims, size_t offset[], size_t globalsize[],
+ size_t localsize[], bool sync, const Queue& q=Queue());
+ void runTask(bool sync, const Queue& q=Queue());
size_t workGroupSize() const;
bool compileWorkGroupSize(size_t wsz[]) const;
namespace cv { namespace ocl {
+struct UMat2D
+{
+ UMat2D(const UMat& m, int accessFlags)
+ {
+ CV_Assert(m.dims == 2);
+ data = (cl_mem)m.handle(accessFlags);
+ offset = m.offset;
+ step = m.step;
+ rows = m.rows;
+ cols = m.cols;
+ }
+ cl_mem data;
+ size_t offset;
+ size_t step;
+ int rows;
+ int cols;
+};
+
+struct UMat3D
+{
+ UMat3D(const UMat& m, int accessFlags)
+ {
+ CV_Assert(m.dims == 3);
+ data = (cl_mem)m.handle(accessFlags);
+ offset = m.offset;
+ step = m.step.p[1];
+ slicestep = m.step.p[0];
+ slices = m.size.p[0];
+ rows = m.size.p[1];
+ cols = m.size.p[2];
+ }
+ cl_mem data;
+ size_t offset;
+ size_t slicestep;
+ size_t step;
+ int slices;
+ int rows;
+ int cols;
+};
+
// Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
{
return data->useOpenCL > 0;
}
+void setUseOpenCL(bool flag)
+{
+ if( haveOpenCL() )
+ {
+ TLSData* data = TLSData::get();
+ data->useOpenCL = flag ? 1 : 0;
+ }
+}
+
void finish()
{
Queue::getDefault().finish();
cl_int retval = 0;
handle = ph != 0 ?
clCreateKernel(ph, kname, &retval) : 0;
+ for( int i = 0; i < MAX_ARRS; i++ )
+ u[i] = 0;
+ }
+
+ void cleanupUMats()
+ {
+ for( int i = 0; i < MAX_ARRS; i++ )
+ if( u[i] )
+ {
+ if( CV_XADD(&u[i]->urefcount, -1) == 1 )
+ u[i]->currAllocator->deallocate(u[i]);
+ u[i] = 0;
+ }
+ nu = 0;
+ }
+
+ void addUMat(const UMat& m)
+ {
+ CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
+ u[nu] = m.u;
+ CV_XADD(&m.u->urefcount, 1);
+ nu++;
}
+
void finit()
{
- if(!f.empty()) f->operator()();
+ cleanupUMats();
if(e) { clReleaseEvent(e); e = 0; }
release();
}
cl_kernel handle;
cl_event e;
- Ptr<Kernel::Callback> f;
+ enum { MAX_ARRS = 16 };
+ UMatData* u[MAX_ARRS];
+ int nu;
};
}}
return p ? p->handle : 0;
}
-int Kernel::set(int i, const void* value, size_t sz)
+void Kernel::set(int i, const void* value, size_t sz)
{
CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 );
- return i+1;
+ if( i == 0 )
+ p->cleanupUMats();
}
-int Kernel::set(int i, const UMat& m)
+void Kernel::set(int i, const UMat& m)
{
- return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
+ set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
}
-int Kernel::set(int i, const KernelArg& arg)
+void Kernel::set(int i, const KernelArg& arg)
{
CV_Assert( p && p->handle );
+ if( i == 0 )
+ p->cleanupUMats();
if( arg.m )
{
- int dims = arg.m->dims;
- void* h = arg.m->handle(((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
- ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0));
- clSetKernelArg(p->handle, (cl_uint)i, sizeof(cl_mem), &h);
- clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(size_t), &arg.m->offset);
- if( dims <= 2 )
+ int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
+ ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
+ if( arg.m->dims <= 2 )
{
- clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(size_t), &arg.m->step.p[0]);
- clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(arg.m->rows), &arg.m->rows);
- clSetKernelArg(p->handle, (cl_uint)(i+4), sizeof(arg.m->cols), &arg.m->cols);
- return i + 5;
+ UMat2D u2d(*arg.m, accessFlags);
+ clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d), &u2d);
}
else
{
- clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(size_t)*(dims-1), &arg.m->step.p[0]);
- clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(cl_int)*dims, &arg.m->size.p[0]);
- return i + 4;
+ UMat3D u3d(*arg.m, accessFlags);
+ clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d), &u3d);
}
+ p->addUMat(*arg.m);
}
else
{
clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
- return i+1;
}
}
void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[],
- bool sync, const Ptr<Callback>& cleanupCallback, const Queue& q)
+ bool sync, const Queue& q)
{
CV_Assert(p && p->handle && p->e == 0);
cl_command_queue qq = getQueue(q);
if( sync )
{
clFinish(qq);
- if( !cleanupCallback.empty() )
- cleanupCallback->operator()();
+ p->cleanupUMats();
}
else
{
- p->f = cleanupCallback;
p->addref();
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
}
}
-void Kernel::runTask(bool sync, const Ptr<Callback>& cleanupCallback, const Queue& q)
+void Kernel::runTask(bool sync, const Queue& q)
{
CV_Assert(p && p->handle && p->e == 0);
cl_command_queue qq = getQueue(q);
if( sync )
{
clFinish(qq);
- if( !cleanupCallback.empty() )
- cleanupCallback->operator()();
+ p->cleanupUMats();
}
else
{
- p->f = cleanupCallback;
p->addref();
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
}