From de431609db6444aa39ffde0e82966b4fbd3182e8 Mon Sep 17 00:00:00 2001 From: krodyush Date: Tue, 17 Dec 2013 14:01:01 +0400 Subject: [PATCH] optimize Dx and Dy calcualtion to make it as single opencl kernel --- modules/ocl/src/imgproc.cpp | 156 ++++++++----- modules/ocl/src/opencl/imgproc_sobel3.cl | 389 +++++++++++++++++++++++++------ 2 files changed, 417 insertions(+), 128 deletions(-) diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index c25dddd..3ce7ba6 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -1033,67 +1033,117 @@ namespace cv else scale = 1. / scale; - if (ksize > 0) + const int sobel_lsz = 16; + if((src.type() == CV_8UC1 || src.type() == CV_32FC1) && + (ksize==3 || ksize==5 || ksize==7 || ksize==-1) && + src.wholerows > sobel_lsz + (ksize>>1) && + src.wholecols > sobel_lsz + (ksize>>1)) { - Context* clCxt = Context::getContext(); - if(clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) && src.type() == CV_8UC1 && - src.cols % 8 == 0 && src.rows % 8 == 0 && - ksize==3 && - (borderType ==cv::BORDER_REFLECT || - borderType == cv::BORDER_REPLICATE || - borderType ==cv::BORDER_REFLECT101 || - borderType ==cv::BORDER_WRAP)) + Dx.create(src.size(), CV_32FC1); + Dy.create(src.size(), CV_32FC1); + + CV_Assert(Dx.rows == Dy.rows && Dx.cols == Dy.cols); + + size_t lt2[3] = {sobel_lsz, sobel_lsz, 1}; + size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1}; + + unsigned int src_pitch = src.step; + unsigned int Dx_pitch = Dx.step; + unsigned int Dy_pitch = Dy.step; + + int src_offset_x = (src.offset % src.step) / src.elemSize(); + int src_offset_y = src.offset / src.step; + + float _scale = scale; + + std::vector > args; + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data )); + args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch )); + + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y )); + + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.offset )); + args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dx_pitch )); + args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.offset )); + args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dy_pitch )); + + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows )); + + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.cols )); + args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.rows )); + + args.push_back( std::make_pair( sizeof(cl_float), (void *)&_scale )); + + string option = cv::format("-D BLK_X=%d -D BLK_Y=%d",(int)lt2[0],(int)lt2[1]); + switch(src.type()) { - Dx.create(src.size(), CV_32FC1); - Dy.create(src.size(), CV_32FC1); - - const unsigned int block_x = 8; - const unsigned int block_y = 8; - - unsigned int src_pitch = src.step; - unsigned int dst_pitch = Dx.cols; - - float _scale = scale; - - std::vector > args; - args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data )); - args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data )); - args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data )); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols )); - args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows )); - args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch )); - args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch )); - args.push_back( std::make_pair( sizeof(cl_float) , (void *)&_scale )); - size_t gt2[3] = {src.cols, src.rows, 1}, lt2[3] = {block_x, block_y, 1}; - - string option = "-D BLK_X=8 -D BLK_Y=8"; - switch(borderType) - { - case cv::BORDER_REPLICATE: - option += " -D BORDER_REPLICATE"; - break; - case cv::BORDER_REFLECT: - option += " -D BORDER_REFLECT"; - break; - case cv::BORDER_REFLECT101: - option += " -D BORDER_REFLECT101"; - break; - case cv::BORDER_WRAP: - option += " -D BORDER_WRAP"; - break; - } - openCLExecuteKernel(src.clCxt, &imgproc_sobel3, "sobel3", gt2, lt2, args, -1, -1, option.c_str() ); + case CV_8UC1: + option += " -D SRCTYPE=uchar"; + break; + case CV_32FC1: + option += " -D SRCTYPE=float"; + break; } - else + switch(borderType) { - Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType); - Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType); + case cv::BORDER_CONSTANT: + option += " -D BORDER_CONSTANT"; + break; + case cv::BORDER_REPLICATE: + option += " -D BORDER_REPLICATE"; + break; + case cv::BORDER_REFLECT: + option += " -D BORDER_REFLECT"; + break; + case cv::BORDER_REFLECT101: + option += " -D BORDER_REFLECT_101"; + break; + case cv::BORDER_WRAP: + option += " -D BORDER_WRAP"; + break; + default: + CV_Error(CV_StsBadFlag, "BORDER type is not supported!"); + break; } + + string kernel_name; + switch(ksize) + { + case -1: + option += " -D SCHARR"; + kernel_name = "sobel3"; + break; + case 3: + kernel_name = "sobel3"; + break; + case 5: + kernel_name = "sobel5"; + break; + case 7: + kernel_name = "sobel7"; + break; + default: + CV_Error(CV_StsBadFlag, "Kernel size is not supported!"); + break; + } + openCLExecuteKernel(src.clCxt, &imgproc_sobel3, kernel_name, gt2, lt2, args, -1, -1, option.c_str() ); } else { - Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType); - Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType); + if (ksize > 0) + { + Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType); + Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType); + } + else + { + Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType); + Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType); + } } CV_Assert(Dx.offset == 0 && Dy.offset == 0); } diff --git a/modules/ocl/src/opencl/imgproc_sobel3.cl b/modules/ocl/src/opencl/imgproc_sobel3.cl index d6a995f..8356fce 100644 --- a/modules/ocl/src/opencl/imgproc_sobel3.cl +++ b/modules/ocl/src/opencl/imgproc_sobel3.cl @@ -1,45 +1,97 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////Macro for border type//////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////// -#ifdef BORDER_REPLICATE -//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh -#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i)) -#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr)) -#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (t_edge) :(i)) -#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (b_edge)-1 :(addr)) -#endif -#ifdef BORDER_REFLECT -//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb -#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i)) -#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr)) -#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i)-1 : (i)) -#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr)) +#ifdef BORDER_CONSTANT +//CCCCCC|abcdefgh|CCCCCCC +#define EXTRAPOLATE(x, maxV) +#elif defined BORDER_REPLICATE +//aaaaaa|abcdefgh|hhhhhhh +#define EXTRAPOLATE(x, maxV) \ + { \ + (x) = max(min((x), (maxV) - 1), 0); \ + } +#elif defined BORDER_WRAP +//cdefgh|abcdefgh|abcdefg +#define EXTRAPOLATE(x, maxV) \ + { \ + (x) = ( (x) + (maxV) ) % (maxV); \ + } +#elif defined BORDER_REFLECT +//fedcba|abcdefgh|hgfedcb +#define EXTRAPOLATE(x, maxV) \ + { \ + (x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \ + } +#elif defined BORDER_REFLECT_101 +//gfedcb|abcdefgh|gfedcba +#define EXTRAPOLATE(x, maxV) \ + { \ + (x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \ + } +#else +#error No extrapolation method #endif -#ifdef BORDER_REFLECT101 -//BORDER_REFLECT101: gfedcb|abcdefgh|gfedcba -#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i)) -#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr)) -#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i) : (i)) -#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr)) -#endif +#define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x]) -#ifdef BORDER_WRAP -//BORDER_WRAP: cdefgh|abcdefgh|abcdefg -#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i)) -#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr)) -#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (i)+(b_edge) : (i)) -#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr)) +#ifdef BORDER_CONSTANT +//CCCCCC|abcdefgh|CCCCCCC +#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y)) +#else +#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y)) #endif +#define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x]) +#define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x]) + +#define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \ + int srcX = x + srcOffsetX - (kernel_border); \ + int srcY = y + srcOffsetY - (kernel_border); \ + int xb = srcX; \ + int yb = srcY; \ + \ + EXTRAPOLATE(xb, (width)); \ + EXTRAPOLATE(yb, (height)); \ + lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \ + \ + if(lix < ((kernel_border)*2)) \ + { \ + int xb = srcX+BLK_X; \ + EXTRAPOLATE(xb,(width)); \ + lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \ + } \ + if(liy< ((kernel_border)*2)) \ + { \ + int yb = srcY+BLK_Y; \ + EXTRAPOLATE(yb, (height)); \ + lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \ + } \ + if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \ + { \ + int xb = srcX+BLK_X; \ + int yb = srcY+BLK_Y; \ + EXTRAPOLATE(xb,(width)); \ + EXTRAPOLATE(yb,(height)); \ + lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \ + } + __kernel void sobel3( __global uchar* Src, - __global float* DstX, - __global float* DstY, - int width, int height, - uint srcStride, uint dstStride, - float scale + const uint SrcPitch, + const int srcOffsetX, + const int srcOffsetY, + __global uchar* DstX, + const int DstXOffset, + const uint DstXPitch, + __global uchar* DstY, + const int DstYOffset, + const uint DstYPitch, + int width, + int height, + int dstWidth, + int dstHeight, + float scale ) { __local float lsmem[BLK_Y+2][BLK_X+2]; @@ -47,62 +99,249 @@ __kernel void sobel3( int lix = get_local_id(0); int liy = get_local_id(1); - int gix = get_group_id(0); - int giy = get_group_id(1); - - int id_x = get_global_id(0); - int id_y = get_global_id(1); - - lsmem[liy+1][lix+1] = convert_float(Src[ id_y * srcStride + id_x ]); - - int id_y_h = ADDR_H(id_y-1, 0,height); - int id_y_b = ADDR_B(id_y+1, height,id_y+1); - - int id_x_l = ADDR_L(id_x-1, 0,width); - int id_x_r = ADDR_R(id_x+1, width,id_x+1); - - if(liy==0) - { - lsmem[0][lix+1]=convert_float(Src[ id_y_h * srcStride + id_x ]); - - if(lix==0) - lsmem[0][0]=convert_float(Src[ id_y_h * srcStride + id_x_l ]); - else if(lix==BLK_X-1) - lsmem[0][BLK_X+1]=convert_float(Src[ id_y_h * srcStride + id_x_r ]); - } - else if(liy==BLK_Y-1) - { - lsmem[BLK_Y+1][lix+1]=convert_float(Src[ id_y_b * srcStride + id_x ]); - - if(lix==0) - lsmem[BLK_Y+1][0]=convert_float(Src[ id_y_b * srcStride + id_x_l ]); - else if(lix==BLK_X-1) - lsmem[BLK_Y+1][BLK_X+1]=convert_float(Src[ id_y_b * srcStride + id_x_r ]); - } - - if(lix==0) - lsmem[liy+1][0] = convert_float(Src[ id_y * srcStride + id_x_l ]); - else if(lix==BLK_X-1) - lsmem[liy+1][BLK_X+1] = convert_float(Src[ id_y * srcStride + id_x_r ]); + int x = (int)get_global_id(0); + int y = (int)get_global_id(1); + INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1) barrier(CLK_LOCAL_MEM_FENCE); + if( x >= dstWidth || y >=dstHeight ) return; + float u1 = lsmem[liy][lix]; float u2 = lsmem[liy][lix+1]; float u3 = lsmem[liy][lix+2]; float m1 = lsmem[liy+1][lix]; - float m2 = lsmem[liy+1][lix+1]; float m3 = lsmem[liy+1][lix+2]; float b1 = lsmem[liy+2][lix]; float b2 = lsmem[liy+2][lix+1]; float b3 = lsmem[liy+2][lix+2]; - //m2 * scale;// - float dx = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1 ); - DstX[ id_y * dstStride + id_x ] = dx * scale; + //calc and store dx and dy;// +#ifdef SCHARR + DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale; + DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale; +#else + DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale; + DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale; +#endif +} + +__kernel void sobel5( + __global uchar* Src, + const uint SrcPitch, + const int srcOffsetX, + const int srcOffsetY, + __global uchar* DstX, + const int DstXOffset, + const uint DstXPitch, + __global uchar* DstY, + const int DstYOffset, + const uint DstYPitch, + int width, + int height, + int dstWidth, + int dstHeight, + float scale + ) +{ + __local float lsmem[BLK_Y+4][BLK_X+4]; + + int lix = get_local_id(0); + int liy = get_local_id(1); + + int x = (int)get_global_id(0); + int y = (int)get_global_id(1); + + INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2) + barrier(CLK_LOCAL_MEM_FENCE); + + if( x >= dstWidth || y >=dstHeight ) return; + + float t1 = lsmem[liy][lix]; + float t2 = lsmem[liy][lix+1]; + float t3 = lsmem[liy][lix+2]; + float t4 = lsmem[liy][lix+3]; + float t5 = lsmem[liy][lix+4]; + + float u1 = lsmem[liy+1][lix]; + float u2 = lsmem[liy+1][lix+1]; + float u3 = lsmem[liy+1][lix+2]; + float u4 = lsmem[liy+1][lix+3]; + float u5 = lsmem[liy+1][lix+4]; + + float m1 = lsmem[liy+2][lix]; + float m2 = lsmem[liy+2][lix+1]; + float m4 = lsmem[liy+2][lix+3]; + float m5 = lsmem[liy+2][lix+4]; + + float l1 = lsmem[liy+3][lix]; + float l2 = lsmem[liy+3][lix+1]; + float l3 = lsmem[liy+3][lix+2]; + float l4 = lsmem[liy+3][lix+3]; + float l5 = lsmem[liy+3][lix+4]; + + float b1 = lsmem[liy+4][lix]; + float b2 = lsmem[liy+4][lix+1]; + float b3 = lsmem[liy+4][lix+2]; + float b4 = lsmem[liy+4][lix+3]; + float b5 = lsmem[liy+4][lix+4]; + + //calc and store dx and dy;// + DSTX(x,y) = scale * + mad(12.0f, m4 - m2, + mad(6.0f, m5 - m1, + mad(8.0f, u4 - u2 + l4 - l2, + mad(4.0f, u5 - u1 + l5 - l1, + mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 ) + ) + ) + ) + ); + + DSTY(x,y) = scale * + mad(12.0f, l3 - u3, + mad(6.0f, b3 - t3, + mad(8.0f, l2 - u2 + l4 - u4, + mad(4.0f, b2 - t2 + b4 - t4, + mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 ) + ) + ) + ) + ); +} + +__kernel void sobel7( + __global uchar* Src, + const uint SrcPitch, + const int srcOffsetX, + const int srcOffsetY, + __global uchar* DstX, + const int DstXOffset, + const uint DstXPitch, + __global uchar* DstY, + const int DstYOffset, + const uint DstYPitch, + int width, + int height, + int dstWidth, + int dstHeight, + float scale + ) +{ + __local float lsmem[BLK_Y+6][BLK_X+6]; + + int lix = get_local_id(0); + int liy = get_local_id(1); + + int x = (int)get_global_id(0); + int y = (int)get_global_id(1); + + INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3) + barrier(CLK_LOCAL_MEM_FENCE); + + if( x >= dstWidth || y >=dstHeight ) return; + + float tt1 = lsmem[liy][lix]; + float tt2 = lsmem[liy][lix+1]; + float tt3 = lsmem[liy][lix+2]; + float tt4 = lsmem[liy][lix+3]; + float tt5 = lsmem[liy][lix+4]; + float tt6 = lsmem[liy][lix+5]; + float tt7 = lsmem[liy][lix+6]; + + float t1 = lsmem[liy+1][lix]; + float t2 = lsmem[liy+1][lix+1]; + float t3 = lsmem[liy+1][lix+2]; + float t4 = lsmem[liy+1][lix+3]; + float t5 = lsmem[liy+1][lix+4]; + float t6 = lsmem[liy+1][lix+5]; + float t7 = lsmem[liy+1][lix+6]; + + float u1 = lsmem[liy+2][lix]; + float u2 = lsmem[liy+2][lix+1]; + float u3 = lsmem[liy+2][lix+2]; + float u4 = lsmem[liy+2][lix+3]; + float u5 = lsmem[liy+2][lix+4]; + float u6 = lsmem[liy+2][lix+5]; + float u7 = lsmem[liy+2][lix+6]; + + float m1 = lsmem[liy+3][lix]; + float m2 = lsmem[liy+3][lix+1]; + float m3 = lsmem[liy+3][lix+2]; + float m5 = lsmem[liy+3][lix+4]; + float m6 = lsmem[liy+3][lix+5]; + float m7 = lsmem[liy+3][lix+6]; + + float l1 = lsmem[liy+4][lix]; + float l2 = lsmem[liy+4][lix+1]; + float l3 = lsmem[liy+4][lix+2]; + float l4 = lsmem[liy+4][lix+3]; + float l5 = lsmem[liy+4][lix+4]; + float l6 = lsmem[liy+4][lix+5]; + float l7 = lsmem[liy+4][lix+6]; + + float b1 = lsmem[liy+5][lix]; + float b2 = lsmem[liy+5][lix+1]; + float b3 = lsmem[liy+5][lix+2]; + float b4 = lsmem[liy+5][lix+3]; + float b5 = lsmem[liy+5][lix+4]; + float b6 = lsmem[liy+5][lix+5]; + float b7 = lsmem[liy+5][lix+6]; + + float bb1 = lsmem[liy+6][lix]; + float bb2 = lsmem[liy+6][lix+1]; + float bb3 = lsmem[liy+6][lix+2]; + float bb4 = lsmem[liy+6][lix+3]; + float bb5 = lsmem[liy+6][lix+4]; + float bb6 = lsmem[liy+6][lix+5]; + float bb7 = lsmem[liy+6][lix+6]; + + //calc and store dx and dy + DSTX(x,y) = scale * + mad(100.0f, m5 - m3, + mad(80.0f, m6 - m2, + mad(20.0f, m7 - m1, + mad(75.0f, u5 - u3 + l5 - l3, + mad(60.0f, u6 - u2 + l6 - l2, + mad(15.0f, u7 - u1 + l7 - l1, + mad(30.0f, t5 - t3 + b5 - b3, + mad(24.0f, t6 - t2 + b6 - b2, + mad(6.0f, t7 - t1 + b7 - b1, + mad(5.0f, tt5 - tt3 + bb5 - bb3, + mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ); - float dy = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3); - DstY[ id_y * dstStride + id_x ] = dy * scale; -} \ No newline at end of file + DSTY(x,y) = scale * + mad(100.0f, l4 - u4, + mad(80.0f, b4 - t4, + mad(20.0f, bb4 - tt4, + mad(75.0f, l5 - u5 + l3 - u3, + mad(60.0f, b5 - t5 + b3 - t3, + mad(15.0f, bb5 - tt5 + bb3 - tt3, + mad(30.0f, l6 - u6 + l2 - u2, + mad(24.0f, b6 - t6 + b2 - t2, + mad(6.0f, bb6 - tt6 + bb2 - tt2, + mad(5.0f, l7 - u7 + l1 - u1, + mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 ) + ) + ) + ) + ) + ) + ) + ) + ) + ) + ); +} -- 2.7.4