//
//M*/
#include "precomp.hpp"
+#include "opencl_kernels.hpp"
namespace cv
{
nu30 = mu30*s3; nu21 = mu21*s3; nu12 = mu12*s3; nu03 = mu03*s3;
}
+static const int OCL_TILE_SIZE = 32;
+
+static bool ocl_moments( InputArray _src, Moments& m, bool binary )
+{
+ printf("!!!!!!!!!!!!!!!!!! ocl moments !!!!!!!!!!!!!!!!!!!\n");
+ const int K = 10;
+ ocl::Kernel k("moments", ocl::imgproc::moments_oclsrc, binary ? "-D BINARY_MOMENTS" : "");
+ if( k.empty() )
+ return false;
+
+ UMat src = _src.getUMat();
+ Size sz = src.size();
+ int xtiles = (sz.width + OCL_TILE_SIZE-1)/OCL_TILE_SIZE;
+ int ytiles = (sz.height + OCL_TILE_SIZE-1)/OCL_TILE_SIZE;
+ int ntiles = xtiles*ytiles;
+ UMat umbuf(1, ntiles*K, CV_32S);
+ umbuf.setTo(Scalar::all(0));
+
+ size_t globalsize[] = {xtiles, ytiles};
+ size_t localsize[] = {1, 1};
+ bool ok = k.args(ocl::KernelArg::ReadOnly(src),
+ ocl::KernelArg::PtrWriteOnly(umbuf),
+ OCL_TILE_SIZE, xtiles, ytiles).run(2, globalsize, localsize, false);
+ if(!ok)
+ return false;
+ Mat mbuf;
+ umbuf.copyTo(mbuf);
+ for( int i = 0; i < ntiles; i++ )
+ {
+ double x = (i % xtiles)*OCL_TILE_SIZE, y = (i / xtiles)*OCL_TILE_SIZE;
+ const int* mom = mbuf.ptr<int>() + i*K;
+ double xm = x * mom[0], ym = y * mom[0];
+
+ // accumulate moments computed in each tile
+
+ // + m00 ( = m00' )
+ m.m00 += mom[0];
+
+ // + m10 ( = m10' + x*m00' )
+ m.m10 += mom[1] + xm;
+
+ // + m01 ( = m01' + y*m00' )
+ m.m01 += mom[2] + ym;
+
+ // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
+ m.m20 += mom[3] + x * (mom[1] * 2 + xm);
+
+ // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
+ m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];
+
+ // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
+ m.m02 += mom[5] + y * (mom[2] * 2 + ym);
+
+ // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
+ m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
+
+ // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
+ m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
+
+ // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
+ m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
+
+ // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
+ m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
+ }
+
+ return true;
+}
+
}
cv::Moments cv::moments( InputArray _src, bool binary )
{
const int TILE_SIZE = 32;
- Mat mat = _src.getMat();
MomentsInTileFunc func = 0;
uchar nzbuf[TILE_SIZE*TILE_SIZE];
Moments m;
- int type = mat.type();
+ int type = _src.type();
int depth = CV_MAT_DEPTH( type );
int cn = CV_MAT_CN( type );
-
- if( mat.checkVector(2) >= 0 && (depth == CV_32F || depth == CV_32S))
- return contourMoments(mat);
-
- Size size = mat.size();
+ Size size = _src.size();
if( cn > 1 )
- CV_Error( CV_StsBadArg, "Invalid image type" );
-
+ CV_Error( CV_StsBadArg, "Invalid image type (must be single-channel)" );
+
if( size.width <= 0 || size.height <= 0 )
return m;
-
- if( binary || depth == CV_8U )
- func = momentsInTile<uchar, int, int>;
- else if( depth == CV_16U )
- func = momentsInTile<ushort, int, int64>;
- else if( depth == CV_16S )
- func = momentsInTile<short, int, int64>;
- else if( depth == CV_32F )
- func = momentsInTile<float, double, double>;
- else if( depth == CV_64F )
- func = momentsInTile<double, double, double>;
+
+ if( ocl::useOpenCL() && depth == CV_8U &&
+ size.width >= OCL_TILE_SIZE &&
+ size.height >= OCL_TILE_SIZE &&
+ /*_src.isUMat() &&*/ ocl_moments(_src, m, binary) )
+ ;
else
- CV_Error( CV_StsUnsupportedFormat, "" );
-
- Mat src0(mat);
-
- for( int y = 0; y < size.height; y += TILE_SIZE )
{
- Size tileSize;
- tileSize.height = std::min(TILE_SIZE, size.height - y);
+ Mat mat = _src.getMat();
+ if( mat.checkVector(2) >= 0 && (depth == CV_32F || depth == CV_32S))
+ return contourMoments(mat);
+
+ if( binary || depth == CV_8U )
+ func = momentsInTile<uchar, int, int>;
+ else if( depth == CV_16U )
+ func = momentsInTile<ushort, int, int64>;
+ else if( depth == CV_16S )
+ func = momentsInTile<short, int, int64>;
+ else if( depth == CV_32F )
+ func = momentsInTile<float, double, double>;
+ else if( depth == CV_64F )
+ func = momentsInTile<double, double, double>;
+ else
+ CV_Error( CV_StsUnsupportedFormat, "" );
- for( int x = 0; x < size.width; x += TILE_SIZE )
+ Mat src0(mat);
+
+ for( int y = 0; y < size.height; y += TILE_SIZE )
{
- tileSize.width = std::min(TILE_SIZE, size.width - x);
- Mat src(src0, cv::Rect(x, y, tileSize.width, tileSize.height));
+ Size tileSize;
+ tileSize.height = std::min(TILE_SIZE, size.height - y);
- if( binary )
+ for( int x = 0; x < size.width; x += TILE_SIZE )
{
- cv::Mat tmp(tileSize, CV_8U, nzbuf);
- cv::compare( src, 0, tmp, CV_CMP_NE );
- src = tmp;
- }
+ tileSize.width = std::min(TILE_SIZE, size.width - x);
+ Mat src(src0, cv::Rect(x, y, tileSize.width, tileSize.height));
- double mom[10];
- func( src, mom );
+ if( binary )
+ {
+ cv::Mat tmp(tileSize, CV_8U, nzbuf);
+ cv::compare( src, 0, tmp, CV_CMP_NE );
+ src = tmp;
+ }
- if(binary)
- {
- double s = 1./255;
- for( int k = 0; k < 10; k++ )
- mom[k] *= s;
- }
+ double mom[10];
+ func( src, mom );
- double xm = x * mom[0], ym = y * mom[0];
+ if(binary)
+ {
+ double s = 1./255;
+ for( int k = 0; k < 10; k++ )
+ mom[k] *= s;
+ }
- // accumulate moments computed in each tile
+ double xm = x * mom[0], ym = y * mom[0];
- // + m00 ( = m00' )
- m.m00 += mom[0];
+ // accumulate moments computed in each tile
- // + m10 ( = m10' + x*m00' )
- m.m10 += mom[1] + xm;
+ // + m00 ( = m00' )
+ m.m00 += mom[0];
- // + m01 ( = m01' + y*m00' )
- m.m01 += mom[2] + ym;
+ // + m10 ( = m10' + x*m00' )
+ m.m10 += mom[1] + xm;
- // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
- m.m20 += mom[3] + x * (mom[1] * 2 + xm);
+ // + m01 ( = m01' + y*m00' )
+ m.m01 += mom[2] + ym;
- // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
- m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];
+ // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
+ m.m20 += mom[3] + x * (mom[1] * 2 + xm);
- // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
- m.m02 += mom[5] + y * (mom[2] * 2 + ym);
+ // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
+ m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];
- // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
- m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
+ // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
+ m.m02 += mom[5] + y * (mom[2] * 2 + ym);
- // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
- m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
+ // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
+ m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
- // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
- m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
+ // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
+ m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
- // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
- m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
+ // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
+ m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
+
+ // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
+ m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
+ }
}
}
--- /dev/null
+/* See LICENSE file in the root OpenCV directory */
+
+#ifdef BINARY_MOMENTS
+#define READ_PIX(ref) (ref != 0)
+#else
+#define READ_PIX(ref) ref
+#endif
+
+__kernel void moments(__global const uchar* src, int src_step, int src_offset,
+ int src_rows, int src_cols, __global int* mom0,
+ int tile_size, int xtiles, int ytiles)
+{
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+ int x_min = x*tile_size;
+ int y_min = y*tile_size;
+
+ if( x_min < src_cols && y_min < src_rows )
+ {
+ int x_max = src_cols - x_min;
+ int y_max = src_rows - y_min;
+ int m[10]={0,0,0,0,0,0,0,0,0,0};
+ __global const uchar* ptr = (src + src_offset);// + y_min*src_step + x_min;
+ __global int* mom = mom0 + (xtiles*y + x)*10;
+
+ x_max = x_max < tile_size ? x_max : tile_size;
+ y_max = y_max < tile_size ? y_max : tile_size;
+
+ for( y = 0; y < y_max; y++ )
+ {
+ int x00, x10, x20, x30;
+ int sx, sy, p;
+ x00 = x10 = x20 = x30 = 0;
+ sy = y*y;
+
+ for( x = 0; x < x_max; x++ )
+ {
+ p = ptr[0];//READ_PIX(ptr[x]);
+ sx = x*x;
+ x00 += p;
+ x10 += x*p;
+ x20 += sx*p;
+ x30 += x*sx*p;
+ }
+
+ m[0] += x00;
+ m[1] += x10;
+ m[2] += y*x00;
+ m[3] += x20;
+ m[4] += y*x10;
+ m[5] += sy*x00;
+ m[6] += x30;
+ m[7] += y*x20;
+ m[8] += sy*x10;
+ m[9] += y*sy*x00;
+ //ptr += src_step;
+ }
+
+ mom[0] = m[0];
+
+ mom[1] = m[1];
+ mom[2] = m[2];
+
+ mom[3] = m[3];
+ mom[4] = m[4];
+ mom[5] = m[5];
+
+ mom[6] = m[6];
+ mom[7] = m[7];
+ mom[8] = m[8];
+ mom[9] = m[9];
+ }
+}
+
+/*__kernel void moments(__global const uchar* src, int src_step, int src_offset,
+ int src_rows, int src_cols, __global float* mom0,
+ int tile_size, int xtiles, int ytiles)
+{
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+ if( x < xtiles && y < ytiles )
+ {
+ //int x_min = x*tile_size;
+ //int y_min = y*tile_size;
+ //int x_max = src_cols - x_min;
+ //int y_max = src_rows - y_min;
+ __global const uchar* ptr = src + src_offset;// + src_step*y_min + x_min;
+ __global float* mom = mom0;// + (y*xtiles + x)*16;
+ //int x00, x10, x20, x30, m00=0;
+ //x_max = min(x_max, tile_size);
+ //y_max = min(y_max, tile_size);
+ //int m00 = 0;
+
+ //for( y = 0; y < y_max; y++, ptr += src_step )
+ //{
+ //int x00 = 0, x10 = 0, x20 = 0, x30 = 0;
+ //for( x = 0; x < x_max; x++ )
+ //{
+ int p = ptr[x];
+ //m00 = p;
+ //x10 += x*p;
+ /*x20 += x*x*p;
+ x30 += x*x*x*p;
+ //}
+ //m00 = m00 + x00;
+ //}
+ mom[0] = p;
+ }
+}*/
+