endif()
endif()
-ocv_warnings_disable(CMAKE_C_FLAGS -Wattributes -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations)
+ocv_warnings_disable(CMAKE_C_FLAGS -Wshorten-64-to-32 -Wattributes -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations)
set_target_properties(${ZLIB_LIBRARY} PROPERTIES
OUTPUT_NAME ${ZLIB_LIBRARY}
add_extra_compiler_option(-march=i686)
endif()
+ if(APPLE)
+ add_extra_compiler_option(-Wno-semicolon-before-method-body)
+ endif()
+
# Other optimizations
if(ENABLE_OMIT_FRAME_POINTER)
add_extra_compiler_option(-fomit-frame-pointer)
endif(NOT ENABLE_NOISY_WARNINGS)
endmacro()
+macro(add_apple_compiler_options the_module)
+ ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS)
+ if(HAVE_OBJC_EXCEPTIONS)
+ foreach(source ${OPENCV_MODULE_${the_module}_SOURCES})
+ if("${source}" MATCHES "\\.mm$")
+ get_source_file_property(flags "${source}" COMPILE_FLAGS)
+ if(flags)
+ set(flags "${_flags} -fobjc-exceptions")
+ else()
+ set(flags "-fobjc-exceptions")
+ endif()
+
+ set_source_files_properties("${source}" PROPERTIES COMPILE_FLAGS "${flags}")
+ endif()
+ endforeach()
+ endif()
+endmacro()
+
# Provides an option that the user can optionally select.
# Can accept condition to control when option is available for user.
# Usage:
////////////////////////////////////////// stereoBM //////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
-#ifdef csize
-
#define MAX_VAL 32767
-void calcDisp(__local short * cost, __global short * disp, int uniquenessRatio, int mindisp, int ndisp, int w,
- __local int * bestDisp, __local int * bestCost, int d, int x, int y, int cols, int rows, int wsz2)
+#ifndef WSZ
+#define WSZ 2
+#endif
+
+#define WSZ2 (WSZ / 2)
+
+#ifdef DEFINE_KERNEL_STEREOBM
+
+#define DISPARITY_SHIFT 4
+#define FILTERED ((MIN_DISP - 1) << DISPARITY_SHIFT)
+
+void calcDisp(__local short * cost, __global short * disp, int uniquenessRatio,
+ __local int * bestDisp, __local int * bestCost, int d, int x, int y, int cols, int rows)
{
- short FILTERED = (mindisp - 1)<<4;
- int best_disp = *bestDisp, best_cost = *bestCost, best_disp_back = ndisp - best_disp - 1;
+ int best_disp = *bestDisp, best_cost = *bestCost;
+ barrier(CLK_LOCAL_MEM_FENCE);
short c = cost[0];
+ int thresh = best_cost + (best_cost * uniquenessRatio / 100);
+ bool notUniq = ( (c <= thresh) && (d < (best_disp - 1) || d > (best_disp + 1) ) );
- int thresh = best_cost + (best_cost * uniquenessRatio/100);
- bool notUniq = ( (c <= thresh) && (d < (best_disp_back - 1) || d > (best_disp_back + 1) ) );
-
- if(notUniq)
+ if (notUniq)
*bestCost = FILTERED;
barrier(CLK_LOCAL_MEM_FENCE);
- if( *bestCost != FILTERED && x < cols-wsz2-mindisp && y < rows-wsz2 && d == best_disp_back)
+ if( *bestCost != FILTERED && x < cols - WSZ2 - MIN_DISP && y < rows - WSZ2 && d == best_disp)
{
- int y3 = (best_disp_back > 0) ? cost[-w] : cost[w],
- y2 = c,
- y1 = (best_disp_back < ndisp-1) ? cost[w] : cost[-w];
- int d_aprox = y3+y1-2*y2 + abs(y3-y1);
- disp[0] = (short)(((best_disp_back + mindisp)*256 + (d_aprox != 0 ? (y3-y1)*256/d_aprox : 0) + 15) >> 4);
+ int d_aprox = 0;
+ int yp =0, yn = 0;
+ if ((0 < best_disp) && (best_disp < NUM_DISP - 1))
+ {
+ yp = cost[-2 * BLOCK_SIZE_Y];
+ yn = cost[2 * BLOCK_SIZE_Y];
+ d_aprox = yp + yn - 2 * c + abs(yp - yn);
+ }
+ disp[0] = (short)(((best_disp + MIN_DISP)*256 + (d_aprox != 0 ? (yp - yn) * 256 / d_aprox : 0) + 15) >> 4);
}
}
-int calcLocalIdx(int x, int y, int d, int w)
-{
- return d*2*w + (w - 1 - y + x);
-}
-
-void calcNewCoordinates(int * x, int * y, int nthread)
-{
- int oldX = *x - (1-nthread), oldY = *y;
- *x = (oldX == oldY) ? (0*nthread + (oldX + 2)*(1-nthread) ) : (oldX+1)*(1-nthread) + (oldX+1)*nthread;
- *y = (oldX == oldY) ? (0*(1-nthread) + (oldY + 1)*nthread) : oldY + 1*(1-nthread);
-}
-
short calcCostBorder(__global const uchar * leftptr, __global const uchar * rightptr, int x, int y, int nthread,
- int wsz2, short * costbuf, int * h, int cols, int d, short cost, int winsize)
+ short * costbuf, int *h, int cols, int d, short cost)
{
- int head = (*h)%wsz;
+ int head = (*h) % WSZ;
__global const uchar * left, * right;
- int idx = mad24(y+wsz2*(2*nthread-1), cols, x+wsz2*(1-2*nthread));
+ int idx = mad24(y + WSZ2 * (2 * nthread - 1), cols, x + WSZ2 * (1 - 2 * nthread));
left = leftptr + idx;
right = rightptr + (idx - d);
- int shift = 1*nthread + cols*(1-nthread);
short costdiff = 0;
- for(int i = 0; i < winsize; i++)
+ if (0 == nthread)
+ {
+ #pragma unroll
+ for (int i = 0; i < WSZ; i++)
+ {
+ costdiff += abs( left[0] - right[0] );
+ left += cols;
+ right += cols;
+ }
+ }
+ else // (1 == nthread)
{
- costdiff += abs( left[0] - right[0] );
- left += shift;
- right += shift;
+ #pragma unroll
+ for (int i = 0; i < WSZ; i++)
+ {
+ costdiff += abs(left[i] - right[i]);
+ }
}
cost += costdiff - costbuf[head];
costbuf[head] = costdiff;
- (*h) = (*h)%wsz + 1;
+ *h = head + 1;
return cost;
}
short calcCostInside(__global const uchar * leftptr, __global const uchar * rightptr, int x, int y,
- int wsz2, int cols, int d, short cost_up_left, short cost_up, short cost_left,
- int winsize)
+ int cols, int d, short cost_up_left, short cost_up, short cost_left)
{
__global const uchar * left, * right;
- int idx = mad24(y-wsz2-1, cols, x-wsz2-1);
+ int idx = mad24(y - WSZ2 - 1, cols, x - WSZ2 - 1);
left = leftptr + idx;
right = rightptr + (idx - d);
- int idx2 = winsize*cols;
+ int idx2 = WSZ*cols;
uchar corrner1 = abs(left[0] - right[0]),
- corrner2 = abs(left[winsize] - right[winsize]),
+ corrner2 = abs(left[WSZ] - right[WSZ]),
corrner3 = abs(left[idx2] - right[idx2]),
- corrner4 = abs(left[idx2 + winsize] - right[idx2 + winsize]);
+ corrner4 = abs(left[idx2 + WSZ] - right[idx2 + WSZ]);
return cost_up + cost_left - cost_up_left + corrner1 -
corrner2 - corrner3 + corrner4;
}
-__kernel void stereoBM(__global const uchar * leftptr, __global const uchar * rightptr, __global uchar * dispptr,
- int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp,
- int preFilterCap, int textureTreshold, int uniquenessRatio, int sizeX, int sizeY, int winsize)
+__kernel void stereoBM(__global const uchar * leftptr,
+ __global const uchar * rightptr,
+ __global uchar * dispptr, int disp_step, int disp_offset,
+ int rows, int cols, // rows, cols of left and right images, not disp
+ int textureTreshold, int uniquenessRatio)
{
- int gx = get_global_id(0)*sizeX;
- int gy = get_global_id(1)*sizeY;
- int lz = get_local_id(2);
+ int lz = get_local_id(0);
+ int gx = get_global_id(1) * BLOCK_SIZE_X;
+ int gy = get_global_id(2) * BLOCK_SIZE_Y;
- int nthread = lz/ndisp;
- int d = lz%ndisp;
- int wsz2 = wsz/2;
+ int nthread = lz / NUM_DISP;
+ int disp_idx = lz % NUM_DISP;
__global short * disp;
__global const uchar * left, * right;
- __local short costFunc[csize];
+ __local short costFunc[2 * BLOCK_SIZE_Y * NUM_DISP];
+
__local short * cost;
__local int best_disp[2];
__local int best_cost[2];
best_cost[nthread] = MAX_VAL;
- best_disp[nthread] = MAX_VAL;
+ best_disp[nthread] = -1;
barrier(CLK_LOCAL_MEM_FENCE);
- short costbuf[wsz];
+ short costbuf[WSZ];
int head = 0;
- int shiftX = wsz2 + ndisp + mindisp - 1;
- int shiftY = wsz2;
+ int shiftX = WSZ2 + NUM_DISP + MIN_DISP - 1;
+ int shiftY = WSZ2;
int x = gx + shiftX, y = gy + shiftY, lx = 0, ly = 0;
- int costIdx = calcLocalIdx(lx, ly, d, sizeY);
+ int costIdx = disp_idx * 2 * BLOCK_SIZE_Y + (BLOCK_SIZE_Y - 1);
cost = costFunc + costIdx;
int tempcost = 0;
- if(x < cols-wsz2-mindisp && y < rows-wsz2)
+ if (x < cols - WSZ2 - MIN_DISP && y < rows - WSZ2)
{
- int shift = 1*nthread + cols*(1-nthread);
- for(int i = 0; i < winsize; i++)
+ if (0 == nthread)
{
- int idx = mad24(y-wsz2+i*nthread, cols, x-wsz2+i*(1-nthread));
- left = leftptr + idx;
- right = rightptr + (idx - d);
- short costdiff = 0;
- for(int j = 0; j < winsize; j++)
+ #pragma unroll
+ for (int i = 0; i < WSZ; i++)
{
- costdiff += abs( left[0] - right[0] );
- left += shift;
- right += shift;
+ int idx = mad24(y - WSZ2, cols, x - WSZ2 + i);
+ left = leftptr + idx;
+ right = rightptr + (idx - disp_idx);
+ short costdiff = 0;
+ for(int j = 0; j < WSZ; j++)
+ {
+ costdiff += abs( left[0] - right[0] );
+ left += cols;
+ right += cols;
+ }
+ costbuf[i] = costdiff;
}
- if(nthread==1)
+ }
+ else // (1 == nthread)
+ {
+ #pragma unroll
+ for (int i = 0; i < WSZ; i++)
{
+ int idx = mad24(y - WSZ2 + i, cols, x - WSZ2);
+ left = leftptr + idx;
+ right = rightptr + (idx - disp_idx);
+ short costdiff = 0;
+ for (int j = 0; j < WSZ; j++)
+ {
+ costdiff += abs( left[j] - right[j]);
+ }
tempcost += costdiff;
+ costbuf[i] = costdiff;
}
- costbuf[head] = costdiff;
- head++;
}
}
- if(nthread==1)
+ if (nthread == 1)
{
cost[0] = tempcost;
- atomic_min(best_cost+nthread, tempcost);
+ atomic_min(best_cost + 1, tempcost);
}
barrier(CLK_LOCAL_MEM_FENCE);
- if(best_cost[1] == tempcost)
- atomic_min(best_disp + 1, ndisp - d - 1);
+ if (best_cost[1] == tempcost)
+ atomic_max(best_disp + 1, disp_idx);
barrier(CLK_LOCAL_MEM_FENCE);
- int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short));
+ int dispIdx = mad24(gy, disp_step, mad24((int)sizeof(short), gx, disp_offset));
disp = (__global short *)(dispptr + dispIdx);
- calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY,
- best_disp + 1, best_cost+1, d, x, y, cols, rows, wsz2);
+ calcDisp(cost, disp, uniquenessRatio, best_disp + 1, best_cost + 1, disp_idx, x, y, cols, rows);
barrier(CLK_LOCAL_MEM_FENCE);
lx = 1 - nthread;
ly = nthread;
- for(int i = 0; i < sizeY*sizeX/2; i++)
+ for (int i = 0; i < BLOCK_SIZE_Y * BLOCK_SIZE_X / 2; i++)
{
- x = (lx < sizeX) ? gx + shiftX + lx : cols;
- y = (ly < sizeY) ? gy + shiftY + ly : rows;
+ x = (lx < BLOCK_SIZE_X) ? gx + shiftX + lx : cols;
+ y = (ly < BLOCK_SIZE_Y) ? gy + shiftY + ly : rows;
best_cost[nthread] = MAX_VAL;
- best_disp[nthread] = MAX_VAL;
+ best_disp[nthread] = -1;
barrier(CLK_LOCAL_MEM_FENCE);
- costIdx = calcLocalIdx(lx, ly, d, sizeY);
+ costIdx = mad24(2 * BLOCK_SIZE_Y, disp_idx, (BLOCK_SIZE_Y - 1 - ly + lx));
+ if (0 > costIdx)
+ costIdx = BLOCK_SIZE_Y - 1;
cost = costFunc + costIdx;
-
- if(x < cols-wsz2-mindisp && y < rows-wsz2 )
+ if (x < cols - WSZ2 - MIN_DISP && y < rows - WSZ2)
{
- tempcost = ( ly*(1-nthread) + lx*nthread == 0 ) ?
- calcCostBorder(leftptr, rightptr, x, y, nthread, wsz2, costbuf, &head, cols, d,
- cost[2*nthread-1], winsize) :
- calcCostInside(leftptr, rightptr, x, y, wsz2, cols, d,
- cost[0], cost[1], cost[-1], winsize);
+ tempcost = (ly * (1 - nthread) + lx * nthread == 0) ?
+ calcCostBorder(leftptr, rightptr, x, y, nthread, costbuf, &head, cols, disp_idx, cost[2*nthread-1]) :
+ calcCostInside(leftptr, rightptr, x, y, cols, disp_idx, cost[0], cost[1], cost[-1]);
}
cost[0] = tempcost;
atomic_min(best_cost + nthread, tempcost);
barrier(CLK_LOCAL_MEM_FENCE);
- if(best_cost[nthread] == tempcost)
- atomic_min(best_disp + nthread, ndisp - d - 1);
+ if (best_cost[nthread] == tempcost)
+ atomic_max(best_disp + nthread, disp_idx);
barrier(CLK_LOCAL_MEM_FENCE);
- int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short));
+ dispIdx = mad24(gy + ly, disp_step, mad24((int)sizeof(short), (gx + lx), disp_offset));
disp = (__global short *)(dispptr + dispIdx);
- calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY,
- best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2);
+ calcDisp(cost, disp, uniquenessRatio, best_disp + nthread, best_cost + nthread, disp_idx, x, y, cols, rows);
+
barrier(CLK_LOCAL_MEM_FENCE);
- calcNewCoordinates(&lx, &ly, nthread);
+ if (lx + nthread - 1 == ly)
+ {
+ lx = (lx + nthread + 1) * (1 - nthread);
+ ly = (ly + 1) * nthread;
+ }
+ else
+ {
+ lx += nthread;
+ ly = ly - nthread + 1;
+ }
}
}
-
-#endif
+#endif //DEFINE_KERNEL_STEREOBM
//////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////// Norm Prefiler ////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void prefilter_norm(__global unsigned char *input, __global unsigned char *output,
- int rows, int cols, int prefilterCap, int winsize, int scale_g, int scale_s)
+ int rows, int cols, int prefilterCap, int scale_g, int scale_s)
{
+ // prefilterCap in range 1..63, checked in StereoBMImpl::compute
+
int x = get_global_id(0);
int y = get_global_id(1);
- int wsz2 = winsize/2;
if(x < cols && y < rows)
{
input[y * cols + max(x-1,0)] * 1 + input[ y * cols + x] * 4 + input[y * cols + min(x+1, cols-1)] * 1 +
input[min(y+1, rows-1) * cols + x] * 1;
int cov2 = 0;
- for(int i = -wsz2; i < wsz2+1; i++)
- for(int j = -wsz2; j < wsz2+1; j++)
+ for(int i = -WSZ2; i < WSZ2+1; i++)
+ for(int j = -WSZ2; j < WSZ2+1; j++)
cov2 += input[clamp(y+i, 0, rows-1) * cols + clamp(x+j, 0, cols-1)];
int res = (cov1*scale_g - cov2*scale_s)>>10;
- res = min(clamp(res, -prefilterCap, prefilterCap) + prefilterCap, 255);
- output[y * cols + x] = res & 0xFF;
+ res = clamp(res, -prefilterCap, prefilterCap) + prefilterCap;
+ output[y * cols + x] = res;
}
}
__kernel void prefilter_xsobel(__global unsigned char *input, __global unsigned char *output,
int rows, int cols, int prefilterCap)
{
+ // prefilterCap in range 1..63, checked in StereoBMImpl::compute
int x = get_global_id(0);
int y = get_global_id(1);
if(x < cols && y < rows)
{
- output[y * cols + x] = min(prefilterCap, 255) & 0xFF;
- }
-
- if(x < cols && y < rows && x > 0 && !((y == rows-1)&(rows%2==1) ) )
- {
- int cov = input[ ((y > 0) ? y-1 : y+1) * cols + (x-1)] * (-1) + input[ ((y > 0) ? y-1 : y+1) * cols + ((x<cols-1) ? x+1 : x-1)] * (1) +
- input[ (y) * cols + (x-1)] * (-2) + input[ (y) * cols + ((x<cols-1) ? x+1 : x-1)] * (2) +
- input[((y<rows-1)?(y+1):(y-1))* cols + (x-1)] * (-1) + input[((y<rows-1)?(y+1):(y-1))* cols + ((x<cols-1) ? x+1 : x-1)] * (1);
+ if (0 < x && !((y == rows-1) & (rows%2==1) ) )
+ {
+ int cov = input[ ((y > 0) ? y-1 : y+1) * cols + (x-1)] * (-1) + input[ ((y > 0) ? y-1 : y+1) * cols + ((x<cols-1) ? x+1 : x-1)] * (1) +
+ input[ (y) * cols + (x-1)] * (-2) + input[ (y) * cols + ((x<cols-1) ? x+1 : x-1)] * (2) +
+ input[((y<rows-1)?(y+1):(y-1))* cols + (x-1)] * (-1) + input[((y<rows-1)?(y+1):(y-1))* cols + ((x<cols-1) ? x+1 : x-1)] * (1);
- cov = min(clamp(cov, -prefilterCap, prefilterCap) + prefilterCap, 255);
- output[y * cols + x] = cov & 0xFF;
+ cov = clamp(cov, -prefilterCap, prefilterCap) + prefilterCap;
+ output[y * cols + x] = cov;
+ }
+ else
+ output[y * cols + x] = prefilterCap;
}
-}
+}
\ No newline at end of file
static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsize, int prefilterCap)
{
- ocl::Kernel k("prefilter_norm", ocl::calib3d::stereobm_oclsrc);
+ ocl::Kernel k("prefilter_norm", ocl::calib3d::stereobm_oclsrc, cv::format("-D WSZ=%d", winsize));
if(k.empty())
return false;
size_t globalThreads[3] = { input.cols, input.rows, 1 };
k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols,
- prefilterCap, winsize, scale_g, scale_s);
+ prefilterCap, scale_g, scale_s);
return k.run(2, globalThreads, NULL, false);
}
int wsz = state->SADWindowSize;
int wsz2 = wsz/2;
- int sizeX = std::max(11, 27 - ocl::Device::getDefault().maxComputeUnits() ), sizeY = sizeX-1, N = ndisp*2;
+ ocl::Device devDef = ocl::Device::getDefault();
+ int sizeX = devDef.isIntel() ? 32 : std::max(11, 27 - devDef.maxComputeUnits()),
+ sizeY = sizeX - 1,
+ N = ndisp * 2;
- ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D wsz=%d", (2*sizeY)*ndisp, wsz) );
+ cv::String opt = cv::format("-D DEFINE_KERNEL_STEREOBM -D MIN_DISP=%d -D NUM_DISP=%d"
+ " -D BLOCK_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D WSZ=%d",
+ mindisp, ndisp,
+ sizeX, sizeY, wsz);
+ ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, opt);
if(k.empty())
return false;
int cols = left.cols, rows = left.rows;
_disp.create(_left.size(), CV_16S);
- _disp.setTo((mindisp - 1)<<4);
+ _disp.setTo((mindisp - 1) << 4);
Rect roi = Rect(Point(wsz2 + mindisp + ndisp - 1, wsz2), Point(cols-wsz2-mindisp, rows-wsz2) );
UMat disp = (_disp.getUMat())(roi);
- int globalX = disp.cols/sizeX, globalY = disp.rows/sizeY;
- globalX += (disp.cols%sizeX) > 0 ? 1 : 0;
- globalY += (disp.rows%sizeY) > 0 ? 1 : 0;
- size_t globalThreads[3] = { globalX, globalY, N};
- size_t localThreads[3] = {1, 1, N};
+ int globalX = (disp.cols + sizeX - 1) / sizeX,
+ globalY = (disp.rows + sizeY - 1) / sizeY;
+ size_t globalThreads[3] = {N, globalX, globalY};
+ size_t localThreads[3] = {N, 1, 1};
int idx = 0;
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left));
idx = k.set(idx, ocl::KernelArg::WriteOnlyNoSize(disp));
idx = k.set(idx, rows);
idx = k.set(idx, cols);
- idx = k.set(idx, mindisp);
- idx = k.set(idx, ndisp);
- idx = k.set(idx, state->preFilterCap);
idx = k.set(idx, state->textureThreshold);
idx = k.set(idx, state->uniquenessRatio);
- idx = k.set(idx, sizeX);
- idx = k.set(idx, sizeY);
- idx = k.set(idx, wsz);
-
return k.run(3, globalThreads, localThreads, false);
}
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
+CV_EXPORTS int checkOptimalVectorWidth(int *vectorWidths,
+ InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
+ InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
+ InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
+ OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
+
// with OCL_VECTOR_MAX strategy
CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type )
{
const ocl::Device & d = ocl::Device::getDefault();
- int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F),
- kercn = ocl::predictOptimalVectorWidth(_src1, _src2, _dst), rowsPerWI = d.isIntel() ? 4 : 1;
+
bool doubleSupport = d.doubleFPConfig() > 0;
Size size = _src1.size();
-
+ int depth = CV_MAT_DEPTH(type);
if ( (!doubleSupport && depth == CV_64F) || size != _src2.size() )
return false;
+ _dst.create(size, type);
+ int cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F);
+ int kercn = ocl::predictOptimalVectorWidthMax(_src1, _src2, _dst),
+ rowsPerWI = d.isIntel() ? 4 : 1;
+
char cvt[2][50];
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s"
if (k.empty())
return false;
- UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
- _dst.create(size, type);
- UMat dst = _dst.getUMat();
+ UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat();
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
u->markDeviceMemMapped(false);
CV_Assert( (retval = clEnqueueUnmapMemObject(q,
(cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS );
+ if (Device::getDefault().isAMD())
+ {
+ // required for multithreaded applications (see stitching test)
+ CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
+ }
u->data = 0;
}
else if( u->copyOnMap() && u->deviceCopyObsolete() )
OclVectorStrategy strat)
{
const ocl::Device & d = ocl::Device::getDefault();
- int ref_type = src1.type();
int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
}
+ return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
+}
+
+int checkOptimalVectorWidth(int *vectorWidths,
+ InputArray src1, InputArray src2, InputArray src3,
+ InputArray src4, InputArray src5, InputArray src6,
+ InputArray src7, InputArray src8, InputArray src9,
+ OclVectorStrategy strat)
+{
+ int ref_type = src1.type();
+
std::vector<size_t> offsets, steps, cols;
std::vector<int> dividers, kercns;
PROCESS_SRC(src1);
static bool isFormatSupported(cl_image_format format)
{
+ if (!haveOpenCL())
+ CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
+
cl_context context = (cl_context)Context::getDefault().ptr();
// Figure out how many formats are supported by this context.
cl_uint numFormats = 0;
void init(const UMat &src, bool norm, bool alias)
{
+ if (!haveOpenCL())
+ CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
+
+ CV_Assert(!src.empty());
CV_Assert(ocl::Device::getDefault().imageSupport());
int err, depth = src.depth(), cn = src.channels();
if (!isFormatSupported(format))
CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
+ if (alias && !src.handle(ACCESS_RW))
+ CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
+
cl_context context = (cl_context)Context::getDefault().ptr();
cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
{
bool ret = false;
const Device & d = ocl::Device::getDefault();
- if (d.imageFromBufferSupport())
+ if (d.imageFromBufferSupport() && !m.empty())
{
// This is the required pitch alignment in pixels
uint pitchAlign = d.imagePitchAlignment();
static void block_function(void* context, size_t index)
{
ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
- (*ptr_body)(cv::Range(index, index + 1));
+ (*ptr_body)(cv::Range((int)index, (int)index + 1));
}
#elif defined HAVE_CONCURRENCY
class ProxyLoopBody : public ParallelLoopBodyWrapper
int val, is_hex = d == 'x';
c = ptr[3];
ptr[3] = '\0';
- val = strtol( ptr + is_hex, &endptr, is_hex ? 8 : 16 );
+ val = (int)strtol( ptr + is_hex, &endptr, is_hex ? 8 : 16 );
ptr[3] = c;
if( endptr == ptr + is_hex )
buf[len++] = 'x';
// find the last occurence of </opencv_storage>
for(;;)
{
- int line_offset = ftell( fs->file );
+ int line_offset = (int)ftell( fs->file );
char* ptr0 = icvGets( fs, xml_buf, xml_buf_size ), *ptr;
if( !ptr0 )
break;
--- /dev/null
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+// Copyright (C) 2014, Itseez, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "../test_precomp.hpp"
+#include "opencv2/ts/ocl_test.hpp"
+
+#ifdef HAVE_OPENCL
+
+namespace cvtest {
+namespace ocl {
+
+TEST(Image2D, createAliasEmptyUMat)
+{
+ if (cv::ocl::haveOpenCL())
+ {
+ UMat um;
+ EXPECT_FALSE(cv::ocl::Image2D::canCreateAlias(um));
+ }
+ else
+ std::cout << "OpenCL runtime not found. Test skipped." << std::endl;
+}
+
+TEST(Image2D, createImage2DWithEmptyUMat)
+{
+ if (cv::ocl::haveOpenCL())
+ {
+ UMat um;
+ EXPECT_ANY_THROW(cv::ocl::Image2D image(um));
+ }
+ else
+ std::cout << "OpenCL runtime not found. Test skipped." << std::endl;
+}
+
+TEST(Image2D, createAlias)
+{
+ if (cv::ocl::haveOpenCL())
+ {
+ const cv::ocl::Device & d = cv::ocl::Device::getDefault();
+ int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
+
+ // aliases is OpenCL 1.2 extension
+ if (1 < major || (1 == major && 2 <= minor))
+ {
+ UMat um(128, 128, CV_8UC1);
+ bool isFormatSupported = false, canCreateAlias = false;
+
+ EXPECT_NO_THROW(isFormatSupported = cv::ocl::Image2D::isFormatSupported(CV_8U, 1, false));
+ EXPECT_NO_THROW(canCreateAlias = cv::ocl::Image2D::canCreateAlias(um));
+
+ if (isFormatSupported && canCreateAlias)
+ {
+ EXPECT_NO_THROW(cv::ocl::Image2D image(um, false, true));
+ }
+ else
+ std::cout << "Impossible to create alias for selected image. Test skipped." << std::endl;
+ }
+ }
+ else
+ std::cout << "OpenCL runtime not found. Test skipped" << std::endl;
+}
+
+TEST(Image2D, turnOffOpenCL)
+{
+ if (cv::ocl::haveOpenCL())
+ {
+ // save the current state
+ bool useOCL = cv::ocl::useOpenCL();
+ bool isFormatSupported = false;
+
+ cv::ocl::setUseOpenCL(true);
+ UMat um(128, 128, CV_8UC1);
+
+ cv::ocl::setUseOpenCL(false);
+ EXPECT_NO_THROW(isFormatSupported = cv::ocl::Image2D::isFormatSupported(CV_8U, 1, true));
+
+ if (isFormatSupported)
+ {
+ EXPECT_NO_THROW(cv::ocl::Image2D image(um));
+ }
+ else
+ std::cout << "CV_8UC1 is not supported for OpenCL images. Test skipped." << std::endl;
+
+ // reset state to the previous one
+ cv::ocl::setUseOpenCL(useOCL);
+ }
+ else
+ std::cout << "OpenCL runtime not found. Test skipped." << std::endl;
+}
+
+} } // namespace cvtest::ocl
+
+#endif // HAVE_OPENCL
\ No newline at end of file
m.create(rows, cols, type);
}
-
-template < int BLOCK_SIZE, int MAX_DESC_LEN >
-static bool ocl_matchUnrolledCached(InputArray _query, InputArray _train,
- const UMat &trainIdx, const UMat &distance, int distType)
+static bool ocl_matchSingle(InputArray query, InputArray train,
+ UMat &trainIdx, UMat &distance, int distType)
{
- int depth = _query.depth();
- cv::String opts;
- opts = cv::format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
- ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE, (int)MAX_DESC_LEN );
- ocl::Kernel k("BruteForceMatch_UnrollMatch", ocl::features2d::brute_force_match_oclsrc, opts);
- if(k.empty())
+ if (query.empty() || train.empty())
return false;
- size_t globalSize[] = {(_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-
- if(globalSize[0] != 0)
- {
- UMat query = _query.getUMat(), train = _train.getUMat();
-
- int idx = 0;
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
- idx = k.set(idx, (void *)NULL, smemSize);
- idx = k.set(idx, query.rows);
- idx = k.set(idx, query.cols);
- idx = k.set(idx, train.rows);
- idx = k.set(idx, train.cols);
- idx = k.set(idx, (int)query.step);
-
- return k.run(2, globalSize, localSize, false);
- }
- return true;
-}
+ const int query_rows = query.rows();
+ const int query_cols = query.cols();
-template < int BLOCK_SIZE >
-static bool ocl_match(InputArray _query, InputArray _train,
- const UMat &trainIdx, const UMat &distance, int distType)
-{
- int depth = _query.depth();
- cv::String opts;
- opts = cv::format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
- ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE);
- ocl::Kernel k("BruteForceMatch_Match", ocl::features2d::brute_force_match_oclsrc, opts);
- if(k.empty())
- return false;
+ ensureSizeIsEnough(1, query_rows, CV_32S, trainIdx);
+ ensureSizeIsEnough(1, query_rows, CV_32F, distance);
- size_t globalSize[] = {(_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
+ ocl::Device devDef = ocl::Device::getDefault();
- if(globalSize[0] != 0)
- {
- UMat query = _query.getUMat(), train = _train.getUMat();
-
- int idx = 0;
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
- idx = k.set(idx, (void *)NULL, smemSize);
- idx = k.set(idx, query.rows);
- idx = k.set(idx, query.cols);
- idx = k.set(idx, train.rows);
- idx = k.set(idx, train.cols);
- idx = k.set(idx, (int)query.step);
-
- return k.run(2, globalSize, localSize, false);
- }
- return true;
-}
+ UMat uquery = query.getUMat(), utrain = train.getUMat();
+ int kercn = 1;
+ if (devDef.isIntel() &&
+ (0 == (uquery.step % 4)) && (0 == (uquery.cols % 4)) && (0 == (uquery.offset % 4)) &&
+ (0 == (utrain.step % 4)) && (0 == (utrain.cols % 4)) && (0 == (utrain.offset % 4)))
+ kercn = 4;
-static bool ocl_matchDispatcher(InputArray query, InputArray train,
- const UMat &trainIdx, const UMat &distance, int distType)
-{
- int query_cols = query.size().width;
- bool is_cpu = ocl::Device::getDefault().type() == ocl::Device::TYPE_CPU;
+ int block_size = 16;
+ int max_desc_len = 0;
+ bool is_cpu = devDef.type() == ocl::Device::TYPE_CPU;
if (query_cols <= 64)
- {
- if(!ocl_matchUnrolledCached<16, 64>(query, train, trainIdx, distance, distType)) return false;
- }
+ max_desc_len = 64 / kercn;
else if (query_cols <= 128 && !is_cpu)
- {
- if(!ocl_matchUnrolledCached<16, 128>(query, train, trainIdx, distance, distType)) return false;
- }
- else
- {
- if(!ocl_match<16>(query, train, trainIdx, distance, distType)) return false;
- }
- return true;
-}
+ max_desc_len = 128 / kercn;
-static bool ocl_matchSingle(InputArray query, InputArray train,
- UMat &trainIdx, UMat &distance, int dstType)
-{
- if (query.empty() || train.empty())
+ int depth = query.depth();
+ cv::String opts;
+ opts = cv::format("-D T=%s -D TN=%s -D kercn=%d %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
+ ocl::typeToStr(depth), ocl::typeToStr(CV_MAKETYPE(depth, kercn)), kercn, depth == CV_32F ? "-D T_FLOAT" : "", distType, block_size, max_desc_len);
+ ocl::Kernel k("BruteForceMatch_Match", ocl::features2d::brute_force_match_oclsrc, opts);
+ if(k.empty())
return false;
- int query_rows = query.size().height;
-
- ensureSizeIsEnough(1, query_rows, CV_32S, trainIdx);
- ensureSizeIsEnough(1, query_rows, CV_32F, distance);
-
- return ocl_matchDispatcher(query, train, trainIdx, distance, dstType);
+ size_t globalSize[] = {(query.size().height + block_size - 1) / block_size * block_size, block_size};
+ size_t localSize[] = {block_size, block_size};
+
+ int idx = 0;
+ idx = k.set(idx, ocl::KernelArg::PtrReadOnly(uquery));
+ idx = k.set(idx, ocl::KernelArg::PtrReadOnly(utrain));
+ idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
+ idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
+ idx = k.set(idx, uquery.rows);
+ idx = k.set(idx, uquery.cols);
+ idx = k.set(idx, utrain.rows);
+ idx = k.set(idx, utrain.cols);
+ idx = k.set(idx, (int)(uquery.step / sizeof(float)));
+
+ return k.run(2, globalSize, localSize, false);
}
static bool ocl_matchConvert(const Mat &trainIdx, const Mat &distance, std::vector< std::vector<DMatch> > &matches)
return ocl_matchConvert(trainIdxCPU, distanceCPU, matches);
}
-template < int BLOCK_SIZE, int MAX_DESC_LEN >
-static bool ocl_knn_matchUnrolledCached(InputArray _query, InputArray _train,
- const UMat &trainIdx, const UMat &distance, int distType)
+static bool ocl_knnMatchSingle(InputArray query, InputArray train, UMat &trainIdx,
+ UMat &distance, int distType)
{
- int depth = _query.depth();
- cv::String opts;
- opts = cv::format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
- ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE, (int)MAX_DESC_LEN );
- ocl::Kernel k("BruteForceMatch_knnUnrollMatch", ocl::features2d::brute_force_match_oclsrc, opts);
- if(k.empty())
+ if (query.empty() || train.empty())
return false;
- size_t globalSize[] = {(_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-
- if(globalSize[0] != 0)
- {
- UMat query = _query.getUMat(), train = _train.getUMat();
-
- int idx = 0;
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
- idx = k.set(idx, (void *)NULL, smemSize);
- idx = k.set(idx, query.rows);
- idx = k.set(idx, query.cols);
- idx = k.set(idx, train.rows);
- idx = k.set(idx, train.cols);
- idx = k.set(idx, (int)query.step);
-
- return k.run(2, globalSize, localSize, false);
- }
- return true;
-}
+ const int query_rows = query.rows();
+ const int query_cols = query.cols();
-template < int BLOCK_SIZE >
-static bool ocl_knn_match(InputArray _query, InputArray _train,
- const UMat &trainIdx, const UMat &distance, int distType)
-{
- int depth = _query.depth();
- cv::String opts;
- opts = format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
- ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE);
- ocl::Kernel k("BruteForceMatch_knnMatch", ocl::features2d::brute_force_match_oclsrc, opts);
- if(k.empty())
- return false;
+ ensureSizeIsEnough(1, query_rows, CV_32SC2, trainIdx);
+ ensureSizeIsEnough(1, query_rows, CV_32FC2, distance);
- size_t globalSize[] = {(_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
+ trainIdx.setTo(Scalar::all(-1));
- if(globalSize[0] != 0)
- {
- UMat query = _query.getUMat(), train = _train.getUMat();
-
- int idx = 0;
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
- idx = k.set(idx, (void*)NULL, smemSize);
- idx = k.set(idx, query.rows);
- idx = k.set(idx, query.cols);
- idx = k.set(idx, train.rows);
- idx = k.set(idx, train.cols);
- idx = k.set(idx, (int)query.step);
-
- return k.run(2, globalSize, localSize, false);
- }
- return true;
-}
+ ocl::Device devDef = ocl::Device::getDefault();
-static bool ocl_match2Dispatcher(InputArray query, InputArray train, const UMat &trainIdx, const UMat &distance, int distType)
-{
- bool is_cpu = ocl::Device::getDefault().type() == ocl::Device::TYPE_CPU;
- if (query.size().width <= 64)
- {
- if(!ocl_knn_matchUnrolledCached<16, 64>(query, train, trainIdx, distance, distType))
- return false;
- }
- else if (query.size().width <= 128 && !is_cpu)
- {
- if(!ocl_knn_matchUnrolledCached<16, 128>(query, train, trainIdx, distance, distType))
- return false;
- }
- else
- {
- if(!ocl_knn_match<16>(query, train, trainIdx, distance, distType))
- return false;
- }
- return true;
-}
+ UMat uquery = query.getUMat(), utrain = train.getUMat();
+ int kercn = 1;
+ if (devDef.isIntel() &&
+ (0 == (uquery.step % 4)) && (0 == (uquery.cols % 4)) && (0 == (uquery.offset % 4)) &&
+ (0 == (utrain.step % 4)) && (0 == (utrain.cols % 4)) && (0 == (utrain.offset % 4)))
+ kercn = 4;
-static bool ocl_kmatchDispatcher(InputArray query, InputArray train, const UMat &trainIdx,
- const UMat &distance, int distType)
-{
- return ocl_match2Dispatcher(query, train, trainIdx, distance, distType);
-}
+ int block_size = 16;
+ int max_desc_len = 0;
+ bool is_cpu = devDef.type() == ocl::Device::TYPE_CPU;
+ if (query_cols <= 64)
+ max_desc_len = 64 / kercn;
+ else if (query_cols <= 128 && !is_cpu)
+ max_desc_len = 128 / kercn;
-static bool ocl_knnMatchSingle(InputArray query, InputArray train, UMat &trainIdx,
- UMat &distance, int dstType)
-{
- if (query.empty() || train.empty())
+ int depth = query.depth();
+ cv::String opts;
+ opts = cv::format("-D T=%s -D TN=%s -D kercn=%d %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
+ ocl::typeToStr(depth), ocl::typeToStr(CV_MAKETYPE(depth, kercn)), kercn, depth == CV_32F ? "-D T_FLOAT" : "", distType, block_size, max_desc_len);
+ ocl::Kernel k("BruteForceMatch_knnMatch", ocl::features2d::brute_force_match_oclsrc, opts);
+ if(k.empty())
return false;
- const int nQuery = query.size().height;
-
- ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
- ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
-
- trainIdx.setTo(Scalar::all(-1));
-
- return ocl_kmatchDispatcher(query, train, trainIdx, distance, dstType);
+ size_t globalSize[] = {(query_rows + block_size - 1) / block_size * block_size, block_size};
+ size_t localSize[] = {block_size, block_size};
+
+ int idx = 0;
+ idx = k.set(idx, ocl::KernelArg::PtrReadOnly(uquery));
+ idx = k.set(idx, ocl::KernelArg::PtrReadOnly(utrain));
+ idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
+ idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
+ idx = k.set(idx, uquery.rows);
+ idx = k.set(idx, uquery.cols);
+ idx = k.set(idx, utrain.rows);
+ idx = k.set(idx, utrain.cols);
+ idx = k.set(idx, (int)(uquery.step / sizeof(float)));
+
+ return k.run(2, globalSize, localSize, false);
}
static bool ocl_knnMatchConvert(const Mat &trainIdx, const Mat &distance, std::vector< std::vector<DMatch> > &matches, bool compactResult)
Mat trainIdxCPU = trainIdx.getMat(ACCESS_READ);
Mat distanceCPU = distance.getMat(ACCESS_READ);
- if (ocl_knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult) )
- return true;
- return false;
-}
-
-template < int BLOCK_SIZE, int MAX_DESC_LEN >
-static bool ocl_matchUnrolledCached(InputArray _query, InputArray _train, float maxDistance,
- const UMat &trainIdx, const UMat &distance, const UMat &nMatches, int distType)
-{
- int depth = _query.depth();
- cv::String opts;
- opts = format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
- ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE, (int)MAX_DESC_LEN);
- ocl::Kernel k("BruteForceMatch_RadiusUnrollMatch", ocl::features2d::brute_force_match_oclsrc, opts);
- if(k.empty())
- return false;
-
- size_t globalSize[] = {(_train.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-
- if(globalSize[0] != 0)
- {
- UMat query = _query.getUMat(), train = _train.getUMat();
-
- int idx = 0;
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
- idx = k.set(idx, maxDistance);
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(nMatches));
- idx = k.set(idx, (void*)NULL, smemSize);
- idx = k.set(idx, query.rows);
- idx = k.set(idx, query.cols);
- idx = k.set(idx, train.rows);
- idx = k.set(idx, train.cols);
- idx = k.set(idx, trainIdx.cols);
- idx = k.set(idx, (int)query.step);
- idx = k.set(idx, (int)trainIdx.step);
-
- return k.run(2, globalSize, localSize, false);
- }
- return true;
-}
-
-//radius_match
-template < int BLOCK_SIZE >
-static bool ocl_radius_match(InputArray _query, InputArray _train, float maxDistance,
- const UMat &trainIdx, const UMat &distance, const UMat &nMatches, int distType)
-{
- int depth = _query.depth();
- cv::String opts;
- opts = format("-D T=%s %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d", ocl::typeToStr(depth), depth == CV_32F ? "-D T_FLOAT" : "", distType, (int)BLOCK_SIZE);
- ocl::Kernel k("BruteForceMatch_RadiusMatch", ocl::features2d::brute_force_match_oclsrc, opts);
- if(k.empty())
- return false;
-
- size_t globalSize[] = {(_train.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (_query.size().height + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
-
- if(globalSize[0] != 0)
- {
- UMat query = _query.getUMat(), train = _train.getUMat();
-
- int idx = 0;
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(query));
- idx = k.set(idx, ocl::KernelArg::PtrReadOnly(train));
- idx = k.set(idx, maxDistance);
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
- idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(nMatches));
- idx = k.set(idx, (void*)NULL, smemSize);
- idx = k.set(idx, query.rows);
- idx = k.set(idx, query.cols);
- idx = k.set(idx, train.rows);
- idx = k.set(idx, train.cols);
- idx = k.set(idx, trainIdx.cols);
- idx = k.set(idx, (int)query.step);
- idx = k.set(idx, (int)trainIdx.step);
-
- return k.run(2, globalSize, localSize, false);
- }
- return true;
-}
-
-static bool ocl_rmatchDispatcher(InputArray query, InputArray train,
- UMat &trainIdx, UMat &distance, UMat &nMatches, float maxDistance, int distType)
-{
- bool is_cpu = ocl::Device::getDefault().type() == ocl::Device::TYPE_CPU;
- int query_cols = query.size().width;
- if (query_cols <= 64)
- {
- if(!ocl_matchUnrolledCached<16, 64>(query, train, maxDistance, trainIdx, distance, nMatches, distType)) return false;
- }
- else if (query_cols <= 128 && !is_cpu)
- {
- if(!ocl_matchUnrolledCached<16, 128>(query, train, maxDistance, trainIdx, distance, nMatches, distType)) return false;
- }
- else
- {
- if(!ocl_radius_match<16>(query, train, maxDistance, trainIdx, distance, nMatches, distType)) return false;
- }
- return true;
+ return ocl_knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
}
-
static bool ocl_radiusMatchSingle(InputArray query, InputArray train,
UMat &trainIdx, UMat &distance, UMat &nMatches, float maxDistance, int distType)
{
if (query.empty() || train.empty())
return false;
- const int nQuery = query.size().height;
- const int nTrain = train.size().height;
+ const int query_rows = query.rows();
+ const int train_rows = train.rows();
- ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches);
+ ensureSizeIsEnough(1, query_rows, CV_32SC1, nMatches);
if (trainIdx.empty())
{
- ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx);
- ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance);
+ ensureSizeIsEnough(query_rows, std::max((train_rows / 100), 10), CV_32SC1, trainIdx);
+ ensureSizeIsEnough(query_rows, std::max((train_rows / 100), 10), CV_32FC1, distance);
}
nMatches.setTo(Scalar::all(0));
- return ocl_rmatchDispatcher(query, train, trainIdx, distance, nMatches, maxDistance, distType);
+ ocl::Device devDef = ocl::Device::getDefault();
+ UMat uquery = query.getUMat(), utrain = train.getUMat();
+ int kercn = 1;
+ if (devDef.isIntel() &&
+ (0 == (uquery.step % 4)) && (0 == (uquery.cols % 4)) && (0 == (uquery.offset % 4)) &&
+ (0 == (utrain.step % 4)) && (0 == (utrain.cols % 4)) && (0 == (utrain.offset % 4)))
+ kercn = 4;
+
+ int block_size = 16;
+ int depth = query.depth();
+ cv::String opts;
+ opts = cv::format("-D T=%s -D TN=%s -D kercn=%d %s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
+ ocl::typeToStr(depth), ocl::typeToStr(CV_MAKETYPE(depth, kercn)), kercn, depth == CV_32F ? "-D T_FLOAT" : "", distType, block_size);
+ ocl::Kernel k("BruteForceMatch_RadiusMatch", ocl::features2d::brute_force_match_oclsrc, opts);
+ if (k.empty())
+ return false;
+
+ size_t globalSize[] = {(train_rows + block_size - 1) / block_size * block_size, (query_rows + block_size - 1) / block_size * block_size, 1};
+ size_t localSize[] = {block_size, block_size, 1};
+
+ int idx = 0;
+ idx = k.set(idx, ocl::KernelArg::PtrReadOnly(uquery));
+ idx = k.set(idx, ocl::KernelArg::PtrReadOnly(utrain));
+ idx = k.set(idx, maxDistance);
+ idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(trainIdx));
+ idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(distance));
+ idx = k.set(idx, ocl::KernelArg::PtrWriteOnly(nMatches));
+ idx = k.set(idx, uquery.rows);
+ idx = k.set(idx, uquery.cols);
+ idx = k.set(idx, utrain.rows);
+ idx = k.set(idx, utrain.cols);
+ idx = k.set(idx, trainIdx.cols);
+ idx = k.set(idx, (int)(uquery.step / sizeof(float)));
+ idx = k.set(idx, (int)(trainIdx.step / sizeof(int)));
+
+ return k.run(2, globalSize, localSize, false);
}
static bool ocl_radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &_nMatches,
#define MAX_DESC_LEN 64
#endif
+#define BLOCK_SIZE_ODD (BLOCK_SIZE + 1)
+#ifndef SHARED_MEM_SZ
+# if (BLOCK_SIZE < MAX_DESC_LEN)
+# define SHARED_MEM_SZ (kercn * (BLOCK_SIZE * MAX_DESC_LEN + BLOCK_SIZE * BLOCK_SIZE))
+# else
+# define SHARED_MEM_SZ (kercn * 2 * BLOCK_SIZE_ODD * BLOCK_SIZE)
+# endif
+#endif
+
#ifndef DIST_TYPE
#define DIST_TYPE 2
#endif
// dirty fix for non-template support
-#if (DIST_TYPE == 2) // L1Dist
+#if (DIST_TYPE == 2) // L1Dist
# ifdef T_FLOAT
-# define DIST(x, y) fabs((x) - (y))
- typedef float value_type;
typedef float result_type;
+# if (8 == kercn)
+ typedef float8 value_type;
+# define DIST(x, y) {value_type d = fabs((x) - (y)); result += d.s0 + d.s1 + d.s2 + d.s3 + d.s4 + d.s5 + d.s6 + d.s7;}
+# elif (4 == kercn)
+ typedef float4 value_type;
+# define DIST(x, y) {value_type d = fabs((x) - (y)); result += d.s0 + d.s1 + d.s2 + d.s3;}
+# else
+ typedef float value_type;
+# define DIST(x, y) result += fabs((x) - (y))
+# endif
# else
-# define DIST(x, y) abs((x) - (y))
- typedef int value_type;
typedef int result_type;
+# if (8 == kercn)
+ typedef int8 value_type;
+# define DIST(x, y) {value_type d = abs((x) - (y)); result += d.s0 + d.s1 + d.s2 + d.s3 + d.s4 + d.s5 + d.s6 + d.s7;}
+# elif (4 == kercn)
+ typedef int4 value_type;
+# define DIST(x, y) {value_type d = abs((x) - (y)); result += d.s0 + d.s1 + d.s2 + d.s3;}
+# else
+ typedef int value_type;
+# define DIST(x, y) result += abs((x) - (y))
+# endif
# endif
-#define DIST_RES(x) (x)
+# define DIST_RES(x) (x)
#elif (DIST_TYPE == 4) // L2Dist
-#define DIST(x, y) (((x) - (y)) * ((x) - (y)))
-typedef float value_type;
-typedef float result_type;
-#define DIST_RES(x) sqrt(x)
+ typedef float result_type;
+# if (8 == kercn)
+ typedef float8 value_type;
+# define DIST(x, y) {value_type d = ((x) - (y)); result += dot(d.s0123, d.s0123) + dot(d.s4567, d.s4567);}
+# elif (4 == kercn)
+ typedef float4 value_type;
+# define DIST(x, y) {value_type d = ((x) - (y)); result += dot(d, d);}
+# else
+ typedef float value_type;
+# define DIST(x, y) {value_type d = ((x) - (y)); result = mad(d, d, result);}
+# endif
+# define DIST_RES(x) sqrt(x)
#elif (DIST_TYPE == 6) // Hamming
-//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-inline int bit1Count(int v)
-{
- v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
- v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
- return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
-}
-#define DIST(x, y) bit1Count( (x) ^ (y) )
-typedef int value_type;
-typedef int result_type;
-#define DIST_RES(x) (x)
+# if (8 == kercn)
+ typedef int8 value_type;
+# elif (4 == kercn)
+ typedef int4 value_type;
+# else
+ typedef int value_type;
+# endif
+ typedef int result_type;
+# define DIST(x, y) result += popcount( (x) ^ (y) )
+# define DIST_RES(x) (x)
#endif
inline result_type reduce_block(
#pragma unroll
for (int j = 0 ; j < BLOCK_SIZE ; j++)
{
- result += DIST(
- s_query[lidy * BLOCK_SIZE + j],
- s_train[j * BLOCK_SIZE + lidx]);
+ DIST(s_query[lidy * BLOCK_SIZE_ODD + j], s_train[j * BLOCK_SIZE_ODD + lidx]);
}
return DIST_RES(result);
}
#pragma unroll
for (int j = 0 ; j < BLOCK_SIZE ; j++)
{
- result += DIST(
- s_query[lidy * BLOCK_SIZE + j],
- s_train[j * BLOCK_SIZE + lidx]);
+ DIST(s_query[lidy * BLOCK_SIZE_ODD + j], s_train[j * BLOCK_SIZE_ODD + lidx]);
}
- return (result);
+ return result;
}
inline result_type reduce_multi_block(
#pragma unroll
for (int j = 0 ; j < BLOCK_SIZE ; j++)
{
- result += DIST(
- s_query[lidy * MAX_DESC_LEN + block_index * BLOCK_SIZE + j],
- s_train[j * BLOCK_SIZE + lidx]);
+ DIST(s_query[lidy * MAX_DESC_LEN + block_index * BLOCK_SIZE + j], s_train[j * BLOCK_SIZE + lidx]);
}
return result;
}
-/* 2dim launch, global size: dim0 is (query rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, dim1 is BLOCK_SIZE
-local size: dim0 is BLOCK_SIZE, dim1 is BLOCK_SIZE.
-*/
-__kernel void BruteForceMatch_UnrollMatch(
+__kernel void BruteForceMatch_Match(
__global T *query,
__global T *train,
- //__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
- __local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
+ const int queryIdx = mad24(BLOCK_SIZE, groupidx, lidy);
+ const int queryOffset = min(queryIdx, query_rows - 1) * step;
+ __global TN *query_vec = (__global TN *)(query + queryOffset);
+ query_cols /= kercn;
+
+ __local float sharebuffer[SHARED_MEM_SZ];
__local value_type *s_query = (__local value_type *)sharebuffer;
- __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
- int queryIdx = groupidx * BLOCK_SIZE + lidy;
+#if 0 < MAX_DESC_LEN
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
// load the query into local memory.
#pragma unroll
- for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
+ for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; i++)
{
- int loadx = lidx + i * BLOCK_SIZE;
- s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ const int loadx = mad24(BLOCK_SIZE, i, lidx);
+ s_query[mad24(MAX_DESC_LEN, lidy, loadx)] = loadx < query_cols ? query_vec[loadx] : 0;
}
+#else
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE;
+#endif
float myBestDistance = MAX_FLOAT;
int myBestTrainIdx = -1;
for (int t = 0, endt = (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt; t++)
{
result_type result = 0;
+
+ const int trainOffset = min(mad24(BLOCK_SIZE, t, lidy), train_rows - 1) * step;
+ __global TN *train_vec = (__global TN *)(train + trainOffset);
+#if 0 < MAX_DESC_LEN
#pragma unroll
- for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
+ for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; i++)
{
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
- const int loadx = lidx + i * BLOCK_SIZE;
- s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ const int loadx = mad24(BLOCK_SIZE, i, lidx);
+ s_train[mad24(BLOCK_SIZE, lidx, lidy)] = loadx < train_cols ? train_vec[loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
}
-
- result = DIST_RES(result);
-
- int trainIdx = t * BLOCK_SIZE + lidx;
-
- if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance/* && mask(queryIdx, trainIdx)*/)
- {
- myBestDistance = result;
- myBestTrainIdx = trainIdx;
- }
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
- __local float *s_distance = (__local float*)(sharebuffer);
- __local int* s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
-
- //find BestMatch
- s_distance += lidy * BLOCK_SIZE;
- s_trainIdx += lidy * BLOCK_SIZE;
- s_distance[lidx] = myBestDistance;
- s_trainIdx[lidx] = myBestTrainIdx;
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- //reduce -- now all reduce implement in each threads.
- #pragma unroll
- for (int k = 0 ; k < BLOCK_SIZE; k++)
- {
- if (myBestDistance > s_distance[k])
+#else
+ for (int i = 0, endq = (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE; i < endq; i++)
{
- myBestDistance = s_distance[k];
- myBestTrainIdx = s_trainIdx[k];
- }
- }
-
- if (queryIdx < query_rows && lidx == 0)
- {
- bestTrainIdx[queryIdx] = myBestTrainIdx;
- bestDistance[queryIdx] = myBestDistance;
- }
-}
-
-__kernel void BruteForceMatch_Match(
- __global T *query,
- __global T *train,
- //__global float *mask,
- __global int *bestTrainIdx,
- __global float *bestDistance,
- __local float *sharebuffer,
- int query_rows,
- int query_cols,
- int train_rows,
- int train_cols,
- int step
-)
-{
- const int lidx = get_local_id(0);
- const int lidy = get_local_id(1);
- const int groupidx = get_group_id(0);
-
- const int queryIdx = groupidx * BLOCK_SIZE + lidy;
-
- float myBestDistance = MAX_FLOAT;
- int myBestTrainIdx = -1;
-
- __local value_type *s_query = (__local value_type *)sharebuffer;
- __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
-
- // loop
- for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
- {
- result_type result = 0;
- for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; i++)
- {
- const int loadx = lidx + i * BLOCK_SIZE;
+ const int loadx = mad24(i, BLOCK_SIZE, lidx);
//load query and train into local memory
- s_query[lidy * BLOCK_SIZE + lidx] = 0;
- s_train[lidx * BLOCK_SIZE + lidy] = 0;
+ s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = 0;
+ s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = 0;
if (loadx < query_cols)
{
- s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
- s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
+ s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = query_vec[loadx];
+ s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = train_vec[loadx];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
}
-
+#endif
result = DIST_RES(result);
- const int trainIdx = t * BLOCK_SIZE + lidx;
+ const int trainIdx = mad24(BLOCK_SIZE, t, lidx);
if (queryIdx < query_rows && trainIdx < train_rows && result < myBestDistance /*&& mask(queryIdx, trainIdx)*/)
{
barrier(CLK_LOCAL_MEM_FENCE);
__local float *s_distance = (__local float *)sharebuffer;
- __local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
+ __local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE);
//findBestMatch
- s_distance += lidy * BLOCK_SIZE;
- s_trainIdx += lidy * BLOCK_SIZE;
+ s_distance += lidy * BLOCK_SIZE_ODD;
+ s_trainIdx += lidy * BLOCK_SIZE_ODD;
s_distance[lidx] = myBestDistance;
s_trainIdx[lidx] = myBestTrainIdx;
barrier(CLK_LOCAL_MEM_FENCE);
//reduce -- now all reduce implement in each threads.
+ #pragma unroll
for (int k = 0 ; k < BLOCK_SIZE; k++)
{
if (myBestDistance > s_distance[k])
}
}
-//radius_unrollmatch
-__kernel void BruteForceMatch_RadiusUnrollMatch(
- __global T *query,
- __global T *train,
- float maxDistance,
- //__global float *mask,
- __global int *bestTrainIdx,
- __global float *bestDistance,
- __global int *nMatches,
- __local float *sharebuffer,
- int query_rows,
- int query_cols,
- int train_rows,
- int train_cols,
- int bestTrainIdx_cols,
- int step,
- int ostep
-)
-{
- const int lidx = get_local_id(0);
- const int lidy = get_local_id(1);
- const int groupidx = get_group_id(0);
- const int groupidy = get_group_id(1);
-
- const int queryIdx = groupidy * BLOCK_SIZE + lidy;
- const int trainIdx = groupidx * BLOCK_SIZE + lidx;
-
- __local value_type *s_query = (__local value_type *)sharebuffer;
- __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
-
- result_type result = 0;
- for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; ++i)
- {
- //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
- const int loadx = lidx + i * BLOCK_SIZE;
-
- s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
- s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
-
- //synchronize to make sure each elem for reduceIteration in share memory is written already.
- barrier(CLK_LOCAL_MEM_FENCE);
-
- result += reduce_block(s_query, s_train, lidx, lidy);
-
- barrier(CLK_LOCAL_MEM_FENCE);
- }
-
- if (queryIdx < query_rows && trainIdx < train_rows &&
- convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
- {
- int ind = atom_inc(nMatches + queryIdx/*, (unsigned int) -1*/);
-
- if(ind < bestTrainIdx_cols)
- {
- bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
- bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
- }
- }
-}
-
//radius_match
__kernel void BruteForceMatch_RadiusMatch(
__global T *query,
__global T *train,
float maxDistance,
- //__global float *mask,
__global int *bestTrainIdx,
__global float *bestDistance,
__global int *nMatches,
- __local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
const int groupidx = get_group_id(0);
const int groupidy = get_group_id(1);
- const int queryIdx = groupidy * BLOCK_SIZE + lidy;
- const int trainIdx = groupidx * BLOCK_SIZE + lidx;
+ const int queryIdx = mad24(BLOCK_SIZE, groupidy, lidy);
+ const int queryOffset = min(queryIdx, query_rows - 1) * step;
+ __global TN *query_vec = (__global TN *)(query + queryOffset);
+
+ const int trainIdx = mad24(BLOCK_SIZE, groupidx, lidx);
+ const int trainOffset = min(mad24(BLOCK_SIZE, groupidx, lidy), train_rows - 1) * step;
+ __global TN *train_vec = (__global TN *)(train + trainOffset);
+ query_cols /= kercn;
+
+ __local float sharebuffer[SHARED_MEM_SZ];
__local value_type *s_query = (__local value_type *)sharebuffer;
- __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE;
result_type result = 0;
for (int i = 0 ; i < (query_cols + BLOCK_SIZE - 1) / BLOCK_SIZE ; ++i)
{
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
- const int loadx = lidx + i * BLOCK_SIZE;
+ const int loadx = mad24(BLOCK_SIZE, i, lidx);
- s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
- s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = 0;
+ s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = 0;
+
+ if (loadx < query_cols)
+ {
+ s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = query_vec[loadx];
+ s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = train_vec[loadx];
+ }
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
}
-
- if (queryIdx < query_rows && trainIdx < train_rows &&
- convert_float(result) < maxDistance/* && mask(queryIdx, trainIdx)*/)
+ if (queryIdx < query_rows && trainIdx < train_rows && convert_float(result) < maxDistance)
{
int ind = atom_inc(nMatches + queryIdx);
if(ind < bestTrainIdx_cols)
{
- bestTrainIdx[queryIdx * (ostep / sizeof(int)) + ind] = trainIdx;
- bestDistance[queryIdx * (ostep / sizeof(float)) + ind] = result;
+ bestTrainIdx[mad24(queryIdx, ostep, ind)] = trainIdx;
+ bestDistance[mad24(queryIdx, ostep, ind)] = result;
}
}
}
-
-__kernel void BruteForceMatch_knnUnrollMatch(
+__kernel void BruteForceMatch_knnMatch(
__global T *query,
__global T *train,
- //__global float *mask,
__global int2 *bestTrainIdx,
__global float2 *bestDistance,
- __local float *sharebuffer,
int query_rows,
int query_cols,
int train_rows,
const int lidy = get_local_id(1);
const int groupidx = get_group_id(0);
- const int queryIdx = groupidx * BLOCK_SIZE + lidy;
+ const int queryIdx = mad24(BLOCK_SIZE, groupidx, lidy);
+ const int queryOffset = min(queryIdx, query_rows - 1) * step;
+ __global TN *query_vec = (__global TN *)(query + queryOffset);
+ query_cols /= kercn;
+
+ __local float sharebuffer[SHARED_MEM_SZ];
__local value_type *s_query = (__local value_type *)sharebuffer;
- __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
+#if 0 < MAX_DESC_LEN
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * MAX_DESC_LEN;
// load the query into local memory.
+ #pragma unroll
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
{
- int loadx = lidx + i * BLOCK_SIZE;
- s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ int loadx = mad24(BLOCK_SIZE, i, lidx);
+ s_query[mad24(MAX_DESC_LEN, lidy, loadx)] = loadx < query_cols ? query_vec[loadx] : 0;
}
+#else
+ __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE;
+#endif
float myBestDistance1 = MAX_FLOAT;
float myBestDistance2 = MAX_FLOAT;
int myBestTrainIdx1 = -1;
int myBestTrainIdx2 = -1;
- //loopUnrolledCached
- for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
+ for (int t = 0, endt = (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE; t < endt ; t++)
{
result_type result = 0;
+
+ int trainOffset = min(mad24(BLOCK_SIZE, t, lidy), train_rows - 1) * step;
+ __global TN *train_vec = (__global TN *)(train + trainOffset);
+#if 0 < MAX_DESC_LEN
+ #pragma unroll
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE ; i++)
{
//load a BLOCK_SIZE * BLOCK_SIZE block into local train.
- const int loadx = lidx + i * BLOCK_SIZE;
- s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0;
+ const int loadx = mad24(BLOCK_SIZE, i, lidx);
+ s_train[mad24(BLOCK_SIZE, lidx, lidy)] = loadx < train_cols ? train_vec[loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
}
-
- result = DIST_RES(result);
-
- const int trainIdx = t * BLOCK_SIZE + lidx;
-
- if (queryIdx < query_rows && trainIdx < train_rows)
- {
- if (result < myBestDistance1)
- {
- myBestDistance2 = myBestDistance1;
- myBestTrainIdx2 = myBestTrainIdx1;
- myBestDistance1 = result;
- myBestTrainIdx1 = trainIdx;
- }
- else if (result < myBestDistance2)
- {
- myBestDistance2 = result;
- myBestTrainIdx2 = trainIdx;
- }
- }
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- __local float *s_distance = (local float *)sharebuffer;
- __local int *s_trainIdx = (local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
-
- // find BestMatch
- s_distance += lidy * BLOCK_SIZE;
- s_trainIdx += lidy * BLOCK_SIZE;
-
- s_distance[lidx] = myBestDistance1;
- s_trainIdx[lidx] = myBestTrainIdx1;
-
- float bestDistance1 = MAX_FLOAT;
- float bestDistance2 = MAX_FLOAT;
- int bestTrainIdx1 = -1;
- int bestTrainIdx2 = -1;
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (lidx == 0)
- {
- for (int i = 0 ; i < BLOCK_SIZE ; i++)
- {
- float val = s_distance[i];
- if (val < bestDistance1)
- {
- bestDistance2 = bestDistance1;
- bestTrainIdx2 = bestTrainIdx1;
-
- bestDistance1 = val;
- bestTrainIdx1 = s_trainIdx[i];
- }
- else if (val < bestDistance2)
- {
- bestDistance2 = val;
- bestTrainIdx2 = s_trainIdx[i];
- }
- }
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- s_distance[lidx] = myBestDistance2;
- s_trainIdx[lidx] = myBestTrainIdx2;
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (lidx == 0)
- {
- for (int i = 0 ; i < BLOCK_SIZE ; i++)
- {
- float val = s_distance[i];
-
- if (val < bestDistance2)
- {
- bestDistance2 = val;
- bestTrainIdx2 = s_trainIdx[i];
- }
- }
- }
-
- myBestDistance1 = bestDistance1;
- myBestDistance2 = bestDistance2;
-
- myBestTrainIdx1 = bestTrainIdx1;
- myBestTrainIdx2 = bestTrainIdx2;
-
- if (queryIdx < query_rows && lidx == 0)
- {
- bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
- bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
- }
-}
-
-__kernel void BruteForceMatch_knnMatch(
- __global T *query,
- __global T *train,
- //__global float *mask,
- __global int2 *bestTrainIdx,
- __global float2 *bestDistance,
- __local float *sharebuffer,
- int query_rows,
- int query_cols,
- int train_rows,
- int train_cols,
- int step
-)
-{
- const int lidx = get_local_id(0);
- const int lidy = get_local_id(1);
- const int groupidx = get_group_id(0);
-
- const int queryIdx = groupidx * BLOCK_SIZE + lidy;
- __local value_type *s_query = (__local value_type *)sharebuffer;
- __local value_type *s_train = (__local value_type *)sharebuffer + BLOCK_SIZE * BLOCK_SIZE;
-
- float myBestDistance1 = MAX_FLOAT;
- float myBestDistance2 = MAX_FLOAT;
- int myBestTrainIdx1 = -1;
- int myBestTrainIdx2 = -1;
-
- //loop
- for (int t = 0 ; t < (train_rows + BLOCK_SIZE - 1) / BLOCK_SIZE ; t++)
- {
- result_type result = 0.0f;
- for (int i = 0 ; i < (query_cols + BLOCK_SIZE -1) / BLOCK_SIZE ; i++)
+#else
+ for (int i = 0, endq = (query_cols + BLOCK_SIZE -1) / BLOCK_SIZE; i < endq ; i++)
{
- const int loadx = lidx + i * BLOCK_SIZE;
+ const int loadx = mad24(BLOCK_SIZE, i, lidx);
//load query and train into local memory
- s_query[lidy * BLOCK_SIZE + lidx] = 0;
- s_train[lidx * BLOCK_SIZE + lidy] = 0;
+ s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = 0;
+ s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = 0;
if (loadx < query_cols)
{
- s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx];
- s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx];
+ s_query[mad24(BLOCK_SIZE_ODD, lidy, lidx)] = query_vec[loadx];
+ s_train[mad24(BLOCK_SIZE_ODD, lidx, lidy)] = train_vec[loadx];
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
}
-
+#endif
result = DIST_RES(result);
- const int trainIdx = t * BLOCK_SIZE + lidx;
+ const int trainIdx = mad24(BLOCK_SIZE, t, lidx);
- if (queryIdx < query_rows && trainIdx < train_rows /*&& mask(queryIdx, trainIdx)*/)
+ if (queryIdx < query_rows && trainIdx < train_rows)
{
if (result < myBestDistance1)
{
barrier(CLK_LOCAL_MEM_FENCE);
__local float *s_distance = (__local float *)sharebuffer;
- __local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE * BLOCK_SIZE);
-
- //findBestMatch
- s_distance += lidy * BLOCK_SIZE;
- s_trainIdx += lidy * BLOCK_SIZE;
+ __local int *s_trainIdx = (__local int *)(sharebuffer + BLOCK_SIZE_ODD * BLOCK_SIZE);
+ // find BestMatch
+ s_distance += lidy * BLOCK_SIZE_ODD;
+ s_trainIdx += lidy * BLOCK_SIZE_ODD;
s_distance[lidx] = myBestDistance1;
s_trainIdx[lidx] = myBestTrainIdx1;
bestTrainIdx[queryIdx] = (int2)(myBestTrainIdx1, myBestTrainIdx2);
bestDistance[queryIdx] = (float2)(myBestDistance1, myBestDistance2);
}
-}
-
-kernel void BruteForceMatch_calcDistanceUnrolled(
- __global T *query,
- __global T *train,
- //__global float *mask,
- __global float *allDist,
- __local float *sharebuffer,
- int query_rows,
- int query_cols,
- int train_rows,
- int train_cols,
- int step)
-{
- /* Todo */
-}
-
-kernel void BruteForceMatch_calcDistance(
- __global T *query,
- __global T *train,
- //__global float *mask,
- __global float *allDist,
- __local float *sharebuffer,
- int query_rows,
- int query_cols,
- int train_rows,
- int train_cols,
- int step)
-{
- /* Todo */
-}
-
-kernel void BruteForceMatch_findBestMatch(
- __global float *allDist,
- __global int *bestTrainIdx,
- __global float *bestDistance,
- int k
-)
-{
- /* Todo */
-}
+}
\ No newline at end of file
macro(ocv_highgui_configure_target)
if(APPLE)
- ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS)
- if(HAVE_OBJC_EXCEPTIONS)
- foreach(source ${OPENCV_MODULE_${the_module}_SOURCES})
- if("${source}" MATCHES "\\.mm$")
- get_source_file_property(flags "${source}" COMPILE_FLAGS)
- if(flags)
- set(flags "${_flags} -fobjc-exceptions")
- else()
- set(flags "-fobjc-exceptions")
- endif()
-
- set_source_files_properties("${source}" PROPERTIES COMPILE_FLAGS "${flags}")
- endif()
- endforeach()
- endif()
+ add_apple_compiler_options(the_module)
endif()
if(BUILD_SHARED_LIBS)
macro(ocv_imgcodecs_configure_target)
if(APPLE)
- ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS)
- if(HAVE_OBJC_EXCEPTIONS)
- foreach(source ${OPENCV_MODULE_${the_module}_SOURCES})
- if("${source}" MATCHES "\\.mm$")
- get_source_file_property(flags "${source}" COMPILE_FLAGS)
- if(flags)
- set(flags "${_flags} -fobjc-exceptions")
- else()
- set(flags "-fobjc-exceptions")
- endif()
-
- set_source_files_properties("${source}" PROPERTIES COMPILE_FLAGS "${flags}")
- endif()
- endforeach()
- endif()
+ add_apple_compiler_options(the_module)
endif()
if(BUILD_SHARED_LIBS)
namespace cv
{
static const char fmtSignTiffII[] = "II\x2a\x00";
-static const char fmtSignTiffMM[] = "MM\x00\x2a";
#ifdef HAVE_TIFF
+static const char fmtSignTiffMM[] = "MM\x00\x2a";
+
#include "tiff.h"
#include "tiffio.h"
#include "opencv2/core.hpp"
#include "precomp.hpp"
+UIImage* MatToUIImage(const cv::Mat& image);
+void UIImageToMat(const UIImage* image, cv::Mat& m, bool alphaExist);
+
UIImage* MatToUIImage(const cv::Mat& image) {
NSData *data = [NSData dataWithBytes:image.data
uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8));
uint16x8_t g1 = vandq_u16(r1, masklo);
g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1)));
- g1 = vshlq_n_u16(vextq_u16(g1, g1, 1), 2);
+ uint16x8_t rot = vextq_u16(g1, g1, 1);
+ g1 = vshlq_n_u16(rot, 2);
// g0 = b0 b2 b4 ...
// g1 = b1 b3 b5 ...
int32x4_t v_r20 = vld1q_s32(row2 + x), v_r21 = vld1q_s32(row2 + x + 4);
int32x4_t v_r30 = vld1q_s32(row3 + x), v_r31 = vld1q_s32(row3 + x + 4);
int32x4_t v_r40 = vld1q_s32(row4 + x), v_r41 = vld1q_s32(row4 + x + 4);
+ int32x4_t shifted;
v_r00 = vaddq_s32(vqaddq_s32(v_r00, v_r40), vqaddq_s32(v_r20, v_r20));
v_r10 = vaddq_s32(vqaddq_s32(v_r10, v_r20), v_r30);
- int32x4_t v_dst0 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r00, vshlq_n_s32(v_r10, 2)), v_delta), 8);
+
+ shifted = vshlq_n_s32(v_r10, 2);
+ int32x4_t v_dst0 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r00, shifted), v_delta), 8);
v_r01 = vaddq_s32(vqaddq_s32(v_r01, v_r41), vqaddq_s32(v_r21, v_r21));
v_r11 = vaddq_s32(vqaddq_s32(v_r11, v_r21), v_r31);
- int32x4_t v_dst1 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r01, vshlq_n_s32(v_r11, 2)), v_delta), 8);
+ shifted = vshlq_n_s32(v_r11, 2);
+ int32x4_t v_dst1 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r01, shifted), v_delta), 8);
vst1q_u16(dst + x, vcombine_u16(vqmovun_s32(v_dst0), vqmovun_s32(v_dst1)));
}
int32x4_t v_r20 = vld1q_s32(row2 + x), v_r21 = vld1q_s32(row2 + x + 4);
int32x4_t v_r30 = vld1q_s32(row3 + x), v_r31 = vld1q_s32(row3 + x + 4);
int32x4_t v_r40 = vld1q_s32(row4 + x), v_r41 = vld1q_s32(row4 + x + 4);
+ int32x4_t shifted;
v_r00 = vaddq_s32(vqaddq_s32(v_r00, v_r40), vqaddq_s32(v_r20, v_r20));
v_r10 = vaddq_s32(vqaddq_s32(v_r10, v_r20), v_r30);
- int32x4_t v_dst0 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r00, vshlq_n_s32(v_r10, 2)), v_delta), 8);
+ shifted = vshlq_n_s32(v_r10, 2);
+ int32x4_t v_dst0 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r00, shifted), v_delta), 8);
v_r01 = vaddq_s32(vqaddq_s32(v_r01, v_r41), vqaddq_s32(v_r21, v_r21));
v_r11 = vaddq_s32(vqaddq_s32(v_r11, v_r21), v_r31);
- int32x4_t v_dst1 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r01, vshlq_n_s32(v_r11, 2)), v_delta), 8);
+ shifted = vshlq_n_s32(v_r11, 2);
+ int32x4_t v_dst1 = vshrq_n_s32(vaddq_s32(vqaddq_s32(v_r01, shifted), v_delta), 8);
vst1q_s16(dst + x, vcombine_s16(vqmovn_s32(v_dst0), vqmovn_s32(v_dst1)));
}
include/opencv2/bioinspired/retina.hpp
+include/opencv2/bioinspired/retinafasttonemapping.hpp
+include/opencv2/bioinspired/transientareassegmentationmodule.hpp
import sys, re, os.path
import logging
+from pprint import pformat
from string import Template
if sys.version_info[0] >= 3:
}, # '', i.e. no class
} # func_arg_fix
-def objdump(obj):
- attrs = ["%s='%s'" % (attr, getattr(obj, attr)) for attr in dir(obj) if not attr.startswith("__") and not hasattr(getattr(obj, attr), "__call__")]
- return "%s [%s]" % (obj.__class__.__name__, " ; ".join(attrs))
+def getLibVersion(version_hpp_path):
+ version_file = open(version_hpp_path, "rt").read()
+ major = re.search("^W*#\W*define\W+CV_VERSION_MAJOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
+ minor = re.search("^W*#\W*define\W+CV_VERSION_MINOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
+ revision = re.search("^W*#\W*define\W+CV_VERSION_REVISION\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
+ status = re.search("^W*#\W*define\W+CV_VERSION_STATUS\W+\"(.*?)\"\W*$", version_file, re.MULTILINE).group(1)
+ return (major, minor, revision, status)
+
+def libVersionBlock():
+ (major, minor, revision, status) = getLibVersion(
+ (os.path.dirname(__file__) or '.') + '/../../core/include/opencv2/core/version.hpp')
+ version_str = '.'.join( (major, minor, revision) ) + status
+ version_suffix = ''.join( (major, minor, revision) )
+ return """
+ // these constants are wrapped inside functions to prevent inlining
+ private static String getVersion() { return "%(v)s"; }
+ private static String getNativeLibraryName() { return "opencv_java%(vs)s"; }
+ private static int getVersionMajor() { return %(ma)s; }
+ private static int getVersionMinor() { return %(mi)s; }
+ private static int getVersionRevision() { return %(re)s; }
+ private static String getVersionStatus() { return "%(st)s"; }
+
+ public static final String VERSION = getVersion();
+ public static final String NATIVE_LIBRARY_NAME = getNativeLibraryName();
+ public static final int VERSION_MAJOR = getVersionMajor();
+ public static final int VERSION_MINOR = getVersionMinor();
+ public static final int VERSION_REVISION = getVersionRevision();
+ public static final String VERSION_STATUS = getVersionStatus();
+""" % { 'v' : version_str, 'vs' : version_suffix, 'ma' : major, 'mi' : minor, 're' : revision, 'st': status }
+
+
+T_JAVA_START_INHERITED = """
+//
+// This file is auto-generated. Please don't modify it!
+//
+package org.opencv.$module;
+
+$imports
+
+// C++: class $name
+//javadoc: $name
+public class $jname extends $base {
+
+ protected $jname(long addr) { super(addr); }
+
+"""
+
+T_JAVA_START_ORPHAN = """
+//
+// This file is auto-generated. Please don't modify it!
+//
+package org.opencv.$module;
+
+$imports
+
+// C++: class $name
+//javadoc: $name
+public class $jname {
+
+ protected final long nativeObj;
+ protected $jname(long addr) { nativeObj = addr; }
+
+"""
+
+T_JAVA_START_MODULE = """
+//
+// This file is auto-generated. Please don't modify it!
+//
+package org.opencv.$module;
+
+$imports
+
+public class $jname {
+"""
+
+T_CPP_MODULE = """
+//
+// This file is auto-generated, please don't edit!
+//
+
+#define LOG_TAG "org.opencv.$m"
+
+#include "common.h"
+
+#include "opencv2/opencv_modules.hpp"
+#ifdef HAVE_OPENCV_$M
+
+#include <string>
+
+#include "opencv2/$m.hpp"
+
+using namespace cv;
+
+/// throw java exception
+static void throwJavaException(JNIEnv *env, const std::exception *e, const char *method) {
+ std::string what = "unknown exception";
+ jclass je = 0;
+
+ if(e) {
+ std::string exception_type = "std::exception";
+
+ if(dynamic_cast<const cv::Exception*>(e)) {
+ exception_type = "cv::Exception";
+ je = env->FindClass("org/opencv/core/CvException");
+ }
+
+ what = exception_type + ": " + e->what();
+ }
+
+ if(!je) je = env->FindClass("java/lang/Exception");
+ env->ThrowNew(je, what.c_str());
+
+ LOGE("%s caught %s", method, what.c_str());
+ (void)method; // avoid "unused" warning
+}
-class DebuggedObject(object):
- def __init__(self):
- pass
- def __str__(self):
- return objdump(self)
-class GeneralInfo(DebuggedObject):
+extern "C" {
+
+$code
+
+} // extern "C"
+
+#endif // HAVE_OPENCV_$M
+"""
+
+class GeneralInfo():
def __init__(self, name, namespaces):
- DebuggedObject.__init__(self)
self.namespace, self.classpath, self.classname, self.name = self.parseName(name, namespaces)
def parseName(self, name, namespaces):
result = ".".join([f for f in [self.namespace] + self.classpath.split(".") if len(f)>0])
return result if not isCPP else result.replace(".", "::")
-def getLibVersion(version_hpp_path):
- version_file = open(version_hpp_path, "rt").read()
- major = re.search("^W*#\W*define\W+CV_VERSION_MAJOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
- minor = re.search("^W*#\W*define\W+CV_VERSION_MINOR\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
- revision = re.search("^W*#\W*define\W+CV_VERSION_REVISION\W+(\d+)\W*$", version_file, re.MULTILINE).group(1)
- status = re.search("^W*#\W*define\W+CV_VERSION_STATUS\W+\"(.*?)\"\W*$", version_file, re.MULTILINE).group(1)
- return (major, minor, revision, status)
-
class ConstInfo(GeneralInfo):
def __init__(self, decl, addedManually=False, namespaces=[]):
GeneralInfo.__init__(self, decl[0], namespaces)
self.value = decl[1]
self.addedManually = addedManually
-class ClassPropInfo(DebuggedObject):
+ def __repr__(self):
+ return Template("CONST $name=$value$manual").substitute(name=self.name,
+ value=self.value,
+ manual="(manual)" if self.addedManually else "")
+
+ def isIgnored(self):
+ for c in const_ignore_list:
+ if re.match(c, self.name):
+ return True
+ return False
+
+class ClassPropInfo():
def __init__(self, decl): # [f_ctype, f_name, '', '/RW']
- DebuggedObject.__init__(self)
self.ctype = decl[0]
self.name = decl[1]
self.rw = "/RW" in decl[3]
+ def __repr__(self):
+ return Template("PROP $ctype $name").substitute(ctype=self.ctype, name=self.name)
+
class ClassInfo(GeneralInfo):
def __init__(self, decl, namespaces=[]): # [ 'class/struct cname', ': base', [modlist] ]
GeneralInfo.__init__(self, decl[0], namespaces)
self.cname = self.name.replace(".", "::")
- self.methods = {}
+ self.methods = []
self.methods_suffixes = {}
self.consts = [] # using a list to save the occurence order
self.private_consts = []
self.imports = set()
self.props= []
self.jname = self.name
+ self.j_code = None # java code stream
+ self.jn_code = None # jni code stream
+ self.cpp_code = None # cpp code stream
for m in decl[2]:
if m.startswith("="):
self.jname = m[1:]
#self.base = re.sub(r"\b"+self.jname+r"\b", "", decl[1].replace(":", "")).strip()
self.base = re.sub(r"^.*:", "", decl[1].split(",")[0]).strip().replace(self.jname, "")
-class ArgInfo(DebuggedObject):
+ def __repr__(self):
+ return Template("CLASS $namespace.$classpath.$name : $base").substitute(**self.__dict__)
+
+ def getAllImports(self, module):
+ return ["import %s;" % c for c in sorted(self.imports) if not c.startswith('org.opencv.'+module)]
+
+ def addImports(self, ctype):
+ if ctype.startswith('vector_vector'):
+ self.imports.add("org.opencv.core.Mat")
+ self.imports.add("org.opencv.utils.Converters")
+ self.imports.add("java.util.List")
+ self.imports.add("java.util.ArrayList")
+ self.addImports(ctype.replace('vector_vector', 'vector'))
+ elif ctype.startswith('vector'):
+ self.imports.add("org.opencv.core.Mat")
+ self.imports.add('java.util.ArrayList')
+ if type_dict[ctype]['j_type'].startswith('MatOf'):
+ self.imports.add("org.opencv.core." + type_dict[ctype]['j_type'])
+ else:
+ self.imports.add("java.util.List")
+ self.imports.add("org.opencv.utils.Converters")
+ self.addImports(ctype.replace('vector_', ''))
+ else:
+ j_type = ''
+ if ctype in type_dict:
+ j_type = type_dict[ctype]['j_type']
+ elif ctype in ("Algorithm"):
+ j_type = ctype
+ if j_type in ( "CvType", "Mat", "Point", "Point3", "Range", "Rect", "RotatedRect", "Scalar", "Size", "TermCriteria", "Algorithm" ):
+ self.imports.add("org.opencv.core." + j_type)
+ if j_type == 'String':
+ self.imports.add("java.lang.String")
+
+ def getAllMethods(self):
+ result = []
+ result.extend([fi for fi in sorted(self.methods) if fi.isconstructor])
+ result.extend([fi for fi in sorted(self.methods) if not fi.isconstructor])
+ return result
+
+ def addMethod(self, fi):
+ self.methods.append(fi)
+
+ def getConst(self, name):
+ for cand in self.consts + self.private_consts:
+ if cand.name == name:
+ return cand
+ return None
+
+ def addConst(self, constinfo):
+ # choose right list (public or private)
+ consts = self.consts
+ for c in const_private_list:
+ if re.match(c, constinfo.name):
+ consts = self.private_consts
+ break
+ consts.append(constinfo)
+
+ def initCodeStreams(self, Module):
+ self.j_code = StringIO()
+ self.jn_code = StringIO()
+ self.cpp_code = StringIO();
+ if self.name != Module:
+ self.j_code.write(T_JAVA_START_INHERITED if self.base else T_JAVA_START_ORPHAN)
+ else:
+ self.j_code.write(T_JAVA_START_MODULE)
+ # misc handling
+ if self.name == 'Core':
+ self.imports.add("java.lang.String")
+ self.j_code.write(libVersionBlock())
+
+ def cleanupCodeStreams(self):
+ self.j_code.close()
+ self.jn_code.close()
+ self.cpp_code.close()
+
+ def generateJavaCode(self, m, M):
+ return Template(self.j_code.getvalue() + "\n\n" + \
+ self.jn_code.getvalue() + "\n}\n").substitute(\
+ module = m,
+ name = self.name,
+ jname = self.jname,
+ imports = "\n".join(self.getAllImports(M)),
+ base = self.base)
+
+ def generateCppCode(self):
+ return self.cpp_code.getvalue()
+
+class ArgInfo():
def __init__(self, arg_tuple): # [ ctype, name, def val, [mod], argno ]
- DebuggedObject.__init__(self)
self.pointer = False
ctype = arg_tuple[0]
if ctype.endswith("*"):
if "/IO" in arg_tuple[3]:
self.out = "IO"
+ def __repr__(self):
+ return Template("ARG $ctype$p $name=$defval").substitute(ctype=self.ctype,
+ p=" *" if self.pointer else "",
+ name=self.name,
+ defval=self.defval)
class FuncInfo(GeneralInfo):
def __init__(self, decl, namespaces=[]): # [ funcname, return_ctype, [modifiers], [args] ]
GeneralInfo.__init__(self, decl[0], namespaces)
self.cname = self.name.replace(".", "::")
self.jname = self.name
+ self.isconstructor = self.name == self.classname
if "[" in self.name:
self.jname = "getelem"
for m in decl[2]:
arg[3] = arg_fix_map.get('attrib', arg[3]) #fixing arg attrib
self.args.append(ArgInfo(arg))
-class FuncFamilyInfo(DebuggedObject):
- def __init__(self, decl, namespaces=set()): # [ funcname, return_ctype, [modifiers], [args] ]
- DebuggedObject.__init__(self)
- self.funcs = []
- self.funcs.append( FuncInfo(decl, namespaces) )
- self.jname = self.funcs[0].jname
- self.isconstructor = self.funcs[0].name == self.funcs[0].classname
-
- def add_func(self, fi):
- self.funcs.append( fi )
-
+ def __repr__(self):
+ return Template("FUNC <$ctype $namespace.$classpath.$name $args>").substitute(**self.__dict__)
class JavaWrapperGenerator(object):
def __init__(self):
self.clear()
def clear(self):
- self.classes = { "Mat" : ClassInfo([ 'class Mat', '', [], [] ]) }
+ self.namespaces = set(["cv"])
+ self.classes = { "Mat" : ClassInfo([ 'class Mat', '', [], [] ], self.namespaces) }
self.module = ""
self.Module = ""
- self.java_code= {} # { class : {j_code, jn_code} }
- self.cpp_code = None
self.ported_func_list = []
self.skipped_func_list = []
self.def_args_hist = {} # { def_args_cnt : funcs_cnt }
- self.classes_map = []
- self.classes_simple = []
- self.namespaces = set(["cv"])
-
- def add_class_code_stream(self, class_name, cls_base = ''):
- jname = self.classes[class_name].jname
- self.java_code[class_name] = { "j_code" : StringIO(), "jn_code" : StringIO(), }
- if class_name != self.Module:
- if cls_base:
- self.java_code[class_name]["j_code"].write("""
-//
-// This file is auto-generated. Please don't modify it!
-//
-package org.opencv.%(m)s;
-
-$imports
-
-// C++: class %(c)s
-//javadoc: %(c)s
-public class %(jc)s extends %(base)s {
-
- protected %(jc)s(long addr) { super(addr); }
-
-""" % { 'm' : self.module, 'c' : class_name, 'jc' : jname, 'base' : cls_base })
- else: # not cls_base
- self.java_code[class_name]["j_code"].write("""
-//
-// This file is auto-generated. Please don't modify it!
-//
-package org.opencv.%(m)s;
-
-$imports
-
-// C++: class %(c)s
-//javadoc: %(c)s
-public class %(jc)s {
-
- protected final long nativeObj;
- protected %(jc)s(long addr) { nativeObj = addr; }
-
-""" % { 'm' : self.module, 'c' : class_name, 'jc' : jname })
- else: # class_name == self.Module
- self.java_code[class_name]["j_code"].write("""
-//
-// This file is auto-generated. Please don't modify it!
-//
-package org.opencv.%(m)s;
-
-$imports
-
-public class %(jc)s {
-""" % { 'm' : self.module, 'jc' : jname } )
-
- if class_name == 'Core':
- (major, minor, revision, status) = getLibVersion(
- (os.path.dirname(__file__) or '.') + '/../../core/include/opencv2/core/version.hpp')
- version_str = '.'.join( (major, minor, revision) ) + status
- version_suffix = ''.join( (major, minor, revision) )
- self.classes[class_name].imports.add("java.lang.String")
- self.java_code[class_name]["j_code"].write("""
- // these constants are wrapped inside functions to prevent inlining
- private static String getVersion() { return "%(v)s"; }
- private static String getNativeLibraryName() { return "opencv_java%(vs)s"; }
- private static int getVersionMajor() { return %(ma)s; }
- private static int getVersionMinor() { return %(mi)s; }
- private static int getVersionRevision() { return %(re)s; }
- private static String getVersionStatus() { return "%(st)s"; }
-
- public static final String VERSION = getVersion();
- public static final String NATIVE_LIBRARY_NAME = getNativeLibraryName();
- public static final int VERSION_MAJOR = getVersionMajor();
- public static final int VERSION_MINOR = getVersionMinor();
- public static final int VERSION_REVISION = getVersionRevision();
- public static final String VERSION_STATUS = getVersionStatus();
-""" % { 'v' : version_str, 'vs' : version_suffix, 'ma' : major, 'mi' : minor, 're' : revision, 'st': status } )
-
def add_class(self, decl):
- logging.info("class: %s", decl)
classinfo = ClassInfo(decl, namespaces=self.namespaces)
if classinfo.name in class_ignore_list:
logging.info('ignored: %s', classinfo)
return
name = classinfo.name
- if name in self.classes:
- print("Generator error: class %s (%s) is duplicated" % \
- (name, classinfo.cname))
- logging.info('duplicated: %s', classinfo)
+ if self.isWrapped(name):
+ logging.warning('duplicated: %s', classinfo)
return
self.classes[name] = classinfo
if name in type_dict:
- print("Duplicated class: " + name)
- logging.info('duplicated: %s', classinfo)
+ logging.warning('duplicated: %s', classinfo)
return
- if '/Simple' in decl[2]:
- self.classes_simple.append(name)
- if ('/Map' in decl[2]):
- self.classes_map.append(name)
- #adding default c-tor
- ffi = FuncFamilyInfo(['cv.'+name+'.'+name, '', [], []], namespaces=self.namespaces)
- classinfo.methods[ffi.jname] = ffi
type_dict[name] = \
{ "j_type" : classinfo.jname,
"jn_type" : "long", "jn_args" : (("__int64", ".nativeObj"),),
if True: #"vector" not in p[0]:
classinfo.props.append( ClassPropInfo(p) )
else:
- print("Skipped property: [%s]" % name, p)
+ logging.warning("Skipped property: [%s]" % name, p)
- self.add_class_code_stream(name, classinfo.base)
if classinfo.base:
- self.get_imports(name, classinfo.base)
+ classinfo.addImports(classinfo.base)
type_dict["Ptr_"+name] = \
{ "j_type" : name,
"jn_type" : "long", "jn_args" : (("__int64", ".nativeObj"),),
logging.info('ok: %s', classinfo)
def add_const(self, decl): # [ "const cname", val, [], [] ]
- logging.info("constant: %s", decl)
constinfo = ConstInfo(decl, namespaces=self.namespaces)
- for c in const_ignore_list:
- if re.match(c, constinfo.name):
- logging.info('ignored: %s', constinfo)
- return
- # class member?
- if len(constinfo.classname) == 0:
- constinfo.classname = self.Module
- if constinfo.classname not in self.classes:
- # this class isn't wrapped
- # skipping this const
+ if constinfo.isIgnored():
+ logging.info('ignored: %s', constinfo)
+ elif not self.isWrapped(constinfo.classname):
logging.info('class not found: %s', constinfo)
- return
-
- consts = self.classes[constinfo.classname].consts
- for c in const_private_list:
- if re.match(c, constinfo.name):
- consts = self.classes[constinfo.classname].private_consts
- break
-
- # checking duplication
- for list in self.classes[constinfo.classname].consts, self.classes[constinfo.classname].private_consts:
- for c in list:
- if c.name == constinfo.name:
- if c.addedManually:
- logging.info('manual: %s', constinfo)
- return
- print("Generator error: constant %s (%s) is duplicated" \
- % (constinfo.name, constinfo.cname))
- logging.info('duplicated: %s', constinfo)
- return
-
- consts.append(constinfo)
- logging.info('ok: %s', constinfo)
+ else:
+ ci = self.getClass(constinfo.classname)
+ duplicate = ci.getConst(constinfo.name)
+ if duplicate:
+ if duplicate.addedManually:
+ logging.info('manual: %s', constinfo)
+ else:
+ logging.warning('duplicated: %s', constinfo)
+ else:
+ ci.addConst(constinfo)
+ logging.info('ok: %s', constinfo)
def add_func(self, decl):
- logging.info("function: %s", decl)
- ffi = FuncFamilyInfo(decl, namespaces=self.namespaces)
- classname = ffi.funcs[0].classname or self.Module
+ fi = FuncInfo(decl, namespaces=self.namespaces)
+ classname = fi.classname or self.Module
if classname in class_ignore_list:
- logging.info('ignored: %s', ffi)
- return
- if classname in ManualFuncs and ffi.jname in ManualFuncs[classname]:
- logging.info('manual: %s', ffi)
- return
- if classname not in self.classes:
- print("Generator error: the class %s for method %s is missing" % \
- (classname, ffi.jname))
- logging.info('not found: %s', ffi)
- return
- func_map = self.classes[classname].methods
- if ffi.jname in func_map:
- func_map[ffi.jname].add_func(ffi.funcs[0])
+ logging.info('ignored: %s', fi)
+ elif classname in ManualFuncs and fi.jname in ManualFuncs[classname]:
+ logging.info('manual: %s', fi)
+ elif not self.isWrapped(classname):
+ logging.warning('not found: %s', fi)
else:
- func_map[ffi.jname] = ffi
- # calc args with def val
- cnt = len([a for a in ffi.funcs[0].args if a.defval])
- self.def_args_hist[cnt] = self.def_args_hist.get(cnt, 0) + 1
- logging.info('ok: %s', ffi)
+ self.getClass(classname).addMethod(fi)
+ logging.info('ok: %s', fi)
+ # calc args with def val
+ cnt = len([a for a in fi.args if a.defval])
+ self.def_args_hist[cnt] = self.def_args_hist.get(cnt, 0) + 1
def save(self, path, buf):
f = open(path, "wt")
for hdr in srcfiles:
decls = parser.parse(hdr)
self.namespaces = parser.namespaces
- logging.info("=== Header: %s", hdr)
- logging.info("=== Namespaces: %s", parser.namespaces)
+ logging.info("\n\n===== Header: %s =====", hdr)
+ logging.info("Namespaces: %s", parser.namespaces)
for decl in decls:
+ logging.info("\n--- Incoming ---\n%s", pformat(decl, 4))
name = decl[0]
if name.startswith("struct") or name.startswith("class"):
self.add_class(decl)
else: # function
self.add_func(decl)
- self.cpp_code = StringIO()
- self.cpp_code.write(Template("""
-//
-// This file is auto-generated, please don't edit!
-//
-
-#define LOG_TAG "org.opencv.$m"
-
-#include "common.h"
-
-#include "opencv2/opencv_modules.hpp"
-#ifdef HAVE_OPENCV_$M
-
-#include <string>
-
-#include "opencv2/$m.hpp"
-
-using namespace cv;
-
-/// throw java exception
-static void throwJavaException(JNIEnv *env, const std::exception *e, const char *method) {
- std::string what = "unknown exception";
- jclass je = 0;
-
- if(e) {
- std::string exception_type = "std::exception";
-
- if(dynamic_cast<const cv::Exception*>(e)) {
- exception_type = "cv::Exception";
- je = env->FindClass("org/opencv/core/CvException");
- }
-
- what = exception_type + ": " + e->what();
- }
-
- if(!je) je = env->FindClass("java/lang/Exception");
- env->ThrowNew(je, what.c_str());
-
- LOGE("%s caught %s", method, what.c_str());
- (void)method; // avoid "unused" warning
-}
-
-
-extern "C" {
-
-""").substitute( m = module, M = module.upper() ) )
-
- # generate code for the classes
- for name in self.classes.keys():
- if name == "Mat":
+ logging.info("\n\n===== Generating... =====")
+ moduleCppCode = StringIO()
+ for ci in self.classes.values():
+ if ci.name == "Mat":
continue
- self.gen_class(name)
- # saving code streams
- imports = "\n".join([ "import %s;" % c for c in \
- sorted(self.classes[name].imports) if not c.startswith('org.opencv.'+self.module) ])
- self.java_code[name]["j_code"].write("\n\n%s\n}\n" % self.java_code[name]["jn_code"].getvalue())
- java_code = self.java_code[name]["j_code"].getvalue()
- java_code = Template(java_code).substitute(imports = imports)
- self.save("%s/%s+%s.java" % (output_path, module, self.classes[name].jname), java_code)
-
- self.cpp_code.write( '\n} // extern "C"\n\n#endif // HAVE_OPENCV_%s\n' % module.upper() )
- self.save(output_path+"/"+module+".cpp", self.cpp_code.getvalue())
-
- # report
+ ci.initCodeStreams(self.Module)
+ self.gen_class(ci)
+ classJavaCode = ci.generateJavaCode(self.module, self.Module)
+ self.save("%s/%s+%s.java" % (output_path, module, ci.jname), classJavaCode)
+ moduleCppCode.write(ci.generateCppCode())
+ ci.cleanupCodeStreams()
+ self.save(output_path+"/"+module+".cpp", Template(T_CPP_MODULE).substitute(m = module, M = module.upper(), code = moduleCppCode.getvalue()))
+ self.save(output_path+"/"+module+".txt", self.makeReport())
+
+ def makeReport(self):
+ '''
+ Returns string with generator report
+ '''
report = StringIO()
- report.write("PORTED FUNCs LIST (%i of %i):\n\n" % \
- (len(self.ported_func_list), len(self.ported_func_list)+ len(self.skipped_func_list))
- )
+ total_count = len(self.ported_func_list)+ len(self.skipped_func_list)
+ report.write("PORTED FUNCs LIST (%i of %i):\n\n" % (len(self.ported_func_list), total_count))
report.write("\n".join(self.ported_func_list))
- report.write("\n\nSKIPPED FUNCs LIST (%i of %i):\n\n" % \
- (len(self.skipped_func_list), len(self.ported_func_list)+ len(self.skipped_func_list))
- )
+ report.write("\n\nSKIPPED FUNCs LIST (%i of %i):\n\n" % (len(self.skipped_func_list), total_count))
report.write("".join(self.skipped_func_list))
-
for i in self.def_args_hist.keys():
report.write("\n%i def args - %i funcs" % (i, self.def_args_hist[i]))
-
- report.write("\n\nclass as MAP:\n\t" + "\n\t".join(self.classes_map))
- report.write("\n\nclass SIMPLE:\n\t" + "\n\t".join(self.classes_simple))
-
- self.save(output_path+"/"+module+".txt", report.getvalue())
-
- #print("Done %i of %i funcs." % (len(self.ported_func_list), len(self.ported_func_list)+ len(self.skipped_func_list)))
-
-
-
- def get_imports(self, scope_classname, ctype):
- imports = self.classes[scope_classname or self.Module].imports
- if ctype.startswith('vector_vector'):
- imports.add("org.opencv.core.Mat")
- imports.add("java.util.List")
- imports.add("org.opencv.utils.Converters")
- self.get_imports(scope_classname, ctype.replace('vector_vector', 'vector'))
- return
- if ctype.startswith('vector'):
- imports.add("org.opencv.core.Mat")
- if type_dict[ctype]['j_type'].startswith('MatOf'):
- imports.add("org.opencv.core." + type_dict[ctype]['j_type'])
- return
- else:
- imports.add("java.util.List")
- imports.add("org.opencv.utils.Converters")
- self.get_imports(scope_classname, ctype.replace('vector_', ''))
- return
- j_type = ''
- if ctype in type_dict:
- j_type = type_dict[ctype]['j_type']
- elif ctype in ("Algorithm"):
- j_type = ctype
- if j_type in ( "CvType", "Mat", "Point", "Point3", "Range", "Rect", "RotatedRect", "Scalar", "Size", "TermCriteria", "Algorithm" ):
- imports.add("org.opencv.core." + j_type)
- if j_type == 'String':
- imports.add("java.lang.String")
- return
+ return report.getvalue()
def fullTypeName(self, t):
- if t in self.classes:
- return self.classes[t].fullName(isCPP=True)
+ if self.isWrapped(t):
+ return self.getClass(t).fullName(isCPP=True)
else:
return t
- def gen_func(self, fi, prop_name=''):
- j_code = self.java_code[fi.classname or self.Module]["j_code"]
- jn_code = self.java_code[fi.classname or self.Module]["jn_code"]
- cpp_code = self.cpp_code
+ def gen_func(self, ci, fi, prop_name=''):
+ logging.info("%s", fi)
+ j_code = ci.j_code
+ jn_code = ci.jn_code
+ cpp_code = ci.cpp_code
# c_decl
# e.g: void add(Mat src1, Mat src2, Mat dst, Mat mask = Mat(), int dtype = -1)
msg = "// Return type '%s' is not supported, skipping the function\n\n" % fi.ctype
self.skipped_func_list.append(c_decl + "\n" + msg)
j_code.write( " "*4 + msg )
- print("SKIP:", c_decl.strip(), "\t due to RET type", fi.ctype)
+ logging.warning("SKIP:" + c_decl.strip() + "\t due to RET type" + fi.ctype)
return
for a in fi.args:
if a.ctype not in type_dict:
msg = "// Unknown type '%s' (%s), skipping the function\n\n" % (a.ctype, a.out or "I")
self.skipped_func_list.append(c_decl + "\n" + msg)
j_code.write( " "*4 + msg )
- print("SKIP:", c_decl.strip(), "\t due to ARG type", a.ctype, "/" + (a.out or "I"))
+ logging.warning("SKIP:" + c_decl.strip() + "\t due to ARG type" + a.ctype + "/" + (a.out or "I"))
return
self.ported_func_list.append(c_decl)
# java args
args = fi.args[:] # copy
- suffix_counter = int( self.classes[fi.classname or self.Module].methods_suffixes.get(fi.jname, -1) )
+ suffix_counter = int(ci.methods_suffixes.get(fi.jname, -1))
while True:
suffix_counter += 1
- self.classes[fi.classname or self.Module].methods_suffixes[fi.jname] = suffix_counter
+ ci.methods_suffixes[fi.jname] = suffix_counter
# java native method args
jn_args = []
# jni (cpp) function args
# adding 'self'
jn_args.append ( ArgInfo([ "__int64", "nativeObj", "", [], "" ]) )
jni_args.append( ArgInfo([ "__int64", "self", "", [], "" ]) )
- self.get_imports(fi.classname, fi.ctype)
+ ci.addImports(fi.ctype)
for a in args:
if not a.ctype: # hidden
continue
- self.get_imports(fi.classname, a.ctype)
+ ci.addImports(a.ctype)
if "vector" in a.ctype: # pass as Mat
jn_args.append ( ArgInfo([ "__int64", "%s_mat.nativeObj" % a.name, "", [], "" ]) )
jni_args.append ( ArgInfo([ "__int64", "%s_mat_nativeObj" % a.name, "", [], "" ]) )
c_prologue.append( "Mat& %(n)s_mat = *((Mat*)%(n)s_mat_nativeObj)" % {"n" : a.name} + ";" )
if "I" in a.out or not a.out:
if a.ctype.startswith("vector_vector_"):
- self.classes[fi.classname or self.Module].imports.add("java.util.ArrayList")
j_prologue.append( "List<Mat> %(n)s_tmplm = new ArrayList<Mat>((%(n)s != null) ? %(n)s.size() : 0);" % {"n" : a.name } )
j_prologue.append( "Mat %(n)s_mat = Converters.%(t)s_to_Mat(%(n)s, %(n)s_tmplm);" % {"n" : a.name, "t" : a.ctype} )
else:
c_epilogue.append( "%(t)s_to_Mat( %(n)s, %(n)s_mat );" % {"n" : a.name, "t" : a.ctype} )
else:
fields = type_dict[a.ctype].get("jn_args", ((a.ctype, ""),))
- if "I" in a.out or not a.out or a.ctype in self.classes: # input arg, pass by primitive fields
+ if "I" in a.out or not a.out or self.isWrapped(a.ctype): # input arg, pass by primitive fields
for f in fields:
jn_args.append ( ArgInfo([ f[0], a.name + f[1], "", [], "" ]) )
jni_args.append( ArgInfo([ f[0], a.name + f[1].replace(".","_").replace("[","").replace("]",""), "", [], "" ]) )
- if a.out and a.ctype not in self.classes: # out arg, pass as double[]
+ if a.out and not self.isWrapped(a.ctype): # out arg, pass as double[]
jn_args.append ( ArgInfo([ "double[]", "%s_out" % a.name, "", [], "" ]) )
jni_args.append ( ArgInfo([ "double[]", "%s_out" % a.name, "", [], "" ]) )
j_prologue.append( "double[] %s_out = new double[%i];" % (a.name, len(fields)) )
else:
ret_val = "Mat retValMat = new Mat("
j_prologue.append( j_type + ' retVal = new Array' + j_type+'();')
- self.classes[fi.classname or self.Module].imports.add('java.util.ArrayList')
j_epilogue.append('Converters.Mat_to_' + ret_type + '(retValMat, retVal);')
elif ret_type.startswith("Ptr_"):
ret_val = type_dict[fi.ctype]["j_type"] + " retVal = new " + type_dict[ret_type]["j_type"] + "("
ret_val = ""
ret = "return;"
elif ret_type == "": # c-tor
- if fi.classname and self.classes[fi.classname].base:
+ if fi.classname and ci.base:
ret_val = "super( "
tail = " )"
else:
ret_val = "nativeObj = "
ret = "return;"
- elif ret_type in self.classes: # wrapped class
- ret_val = type_dict[ret_type]["j_type"] + " retVal = new " + self.classes[ret_type].jname + "("
+ elif self.isWrapped(ret_type): # wrapped class
+ ret_val = type_dict[ret_type]["j_type"] + " retVal = new " + self.getClass(ret_type).jname + "("
tail = ")"
elif "jn_type" not in type_dict[ret_type]:
ret_val = type_dict[fi.ctype]["j_type"] + " retVal = new " + type_dict[ret_type]["j_type"] + "("
elif fi.ctype == "String":
ret = "return env->NewStringUTF(_retval_.c_str());"
default = 'return env->NewStringUTF("");'
- elif fi.ctype in self.classes: # wrapped class:
+ elif self.isWrapped(fi.ctype): # wrapped class:
ret = "return (jlong) new %s(_retval_);" % self.fullTypeName(fi.ctype)
elif fi.ctype.startswith('Ptr_'):
c_prologue.append("typedef Ptr<%s> %s;" % (self.fullTypeName(fi.ctype[4:]), fi.ctype))
ret = "return (jlong)(new %(ctype)s(_retval_));" % { 'ctype':fi.ctype }
- elif ret_type in self.classes: # pointer to wrapped class:
+ elif self.isWrapped(ret_type): # pointer to wrapped class:
ret = "return (jlong) _retval_;"
elif type_dict[fi.ctype]["jni_type"] == "jdoubleArray":
ret = "return _da_retval_;"
jni_name = a.defval
cvargs.append( type_dict[a.ctype].get("jni_name", jni_name) % {"n" : a.name})
if "vector" not in a.ctype :
- if ("I" in a.out or not a.out or a.ctype in self.classes) and "jni_var" in type_dict[a.ctype]: # complex type
+ if ("I" in a.out or not a.out or self.isWrapped(a.ctype)) and "jni_var" in type_dict[a.ctype]: # complex type
c_prologue.append(type_dict[a.ctype]["jni_var"] % {"n" : a.name} + ";")
- if a.out and "I" not in a.out and a.ctype not in self.classes and a.ctype:
+ if a.out and "I" not in a.out and not self.isWrapped(a.ctype) and a.ctype:
c_prologue.append("%s %s;" % (a.ctype, a.name))
rtype = type_dict[fi.ctype].get("jni_type", "jdoubleArray")
- clazz = self.Module
- if fi.classname:
- clazz = self.classes[fi.classname].jname
+ clazz = ci.jname
cpp_code.write ( Template( \
"""
JNIEXPORT $rtype JNICALL Java_org_opencv_${module}_${clazz}_$fname ($argst);
- def gen_class(self, name):
- # generate code for the class
- ci = self.classes[name]
+ def gen_class(self, ci):
+ logging.info("%s", ci)
# constants
if ci.private_consts:
- self.java_code[name]['j_code'].write("""
+ logging.info("%s", ci.private_consts)
+ ci.j_code.write("""
private static final int
%s;\n\n""" % (",\n"+" "*12).join(["%s = %s" % (c.name, c.value) for c in ci.private_consts])
)
if ci.consts:
- self.java_code[name]['j_code'].write("""
+ logging.info("%s", ci.consts)
+ ci.j_code.write("""
public static final int
%s;\n\n""" % (",\n"+" "*12).join(["%s = %s" % (c.name, c.value) for c in ci.consts])
)
- # c-tors
- fflist = sorted(ci.methods.items())
- for n, ffi in fflist:
- if ffi.isconstructor:
- for fi in ffi.funcs:
- fi.jname = ci.jname
- self.gen_func(fi)
- # other methods
- for n, ffi in fflist:
- if not ffi.isconstructor:
- for fi in ffi.funcs:
- self.gen_func(fi)
+ # methods
+ for fi in ci.getAllMethods():
+ self.gen_func(ci, fi)
# props
for pi in ci.props:
# getter
getter_name = ci.fullName() + ".get_" + pi.name
fi = FuncInfo( [getter_name, pi.ctype, [], []], self.namespaces ) # [ funcname, return_ctype, [modifiers], [args] ]
- self.gen_func(fi, pi.name)
+ self.gen_func(ci, fi, pi.name)
if pi.rw:
#setter
setter_name = ci.fullName() + ".set_" + pi.name
fi = FuncInfo( [ setter_name, "void", [], [ [pi.ctype, pi.name, "", [], ""] ] ], self.namespaces)
- self.gen_func(fi, pi.name)
+ self.gen_func(ci, fi, pi.name)
# manual ports
- if name in ManualFuncs:
- for func in ManualFuncs[name].keys():
- self.java_code[name]["j_code"].write ( ManualFuncs[name][func]["j_code"] )
- self.java_code[name]["jn_code"].write( ManualFuncs[name][func]["jn_code"] )
- self.cpp_code.write( ManualFuncs[name][func]["cpp_code"] )
+ if ci.name in ManualFuncs:
+ for func in ManualFuncs[ci.name].keys():
+ ci.j_code.write ( ManualFuncs[ci.name][func]["j_code"] )
+ ci.jn_code.write( ManualFuncs[ci.name][func]["jn_code"] )
+ ci.cpp_code.write( ManualFuncs[ci.name][func]["cpp_code"] )
- if name != self.Module:
+ if ci.name != self.Module:
# finalize()
- self.java_code[name]["j_code"].write(
+ ci.j_code.write(
"""
@Override
protected void finalize() throws Throwable {
}
""" )
- self.java_code[name]["jn_code"].write(
+ ci.jn_code.write(
"""
// native support for java finalize()
private static native void delete(long nativeObj);
""" )
# native support for java finalize()
- self.cpp_code.write( \
+ ci.cpp_code.write( \
"""
//
// native support for java finalize()
""" % {"module" : module, "cls" : self.smartWrap(ci.name, ci.fullName(isCPP=True)), "j_cls" : ci.jname.replace('_', '_1')}
)
+ def getClass(self, classname):
+ return self.classes[classname or self.Module]
+
+ def isWrapped(self, classname):
+ name = classname or self.Module
+ return name in self.classes
+
def isSmartClass(self, classname):
'''
Check if class stores Ptr<T>* instead of T* in nativeObj field
'''
- return classname in self.classes and self.classes[classname].base
+ return self.isWrapped(classname) and self.classes[classname].base
def smartWrap(self, name, fullname):
'''
import hdr_parser
module = sys.argv[2]
srcfiles = sys.argv[3:]
- logging.basicConfig(filename='%s/%s.log' % (dstdir, module), filemode='w', level=logging.INFO)
+ logging.basicConfig(filename='%s/%s.log' % (dstdir, module), format=None, filemode='w', level=logging.INFO)
+ handler = logging.StreamHandler()
+ handler.setLevel(logging.WARNING)
+ logging.getLogger().addHandler(handler)
#print("Generating module '" + module + "' from headers:\n\t" + "\n\t".join(srcfiles))
generator = JavaWrapperGenerator()
generator.gen(srcfiles, module, dstdir)
if( !f )
CV_Error( CV_StsError, "" );
fseek( f, 0, SEEK_END );
- size = ftell( f );
+ size = (int)ftell( f );
fseek( f, 0, SEEK_SET );
size_t elements_read = fread( ptr, 1, size, f );
CV_Assert(elements_read == (size_t)(size));
Point2f warpPoint(const Point2f &pt, InputArray K, InputArray R, InputArray T);
virtual Rect buildMaps(Size src_size, InputArray K, InputArray R, InputArray T, OutputArray xmap, OutputArray ymap);
+ Rect buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap);
virtual Point warp(InputArray src, InputArray K, InputArray R, InputArray T, int interp_mode, int border_mode,
OutputArray dst);
extractor_ = sextractor_;
}
#else
+ (void)hess_thresh;
+ (void)num_octaves;
+ (void)num_layers;
+ (void)num_octaves_descr;
+ (void)num_layers_descr;
CV_Error( Error::StsNotImplemented, "OpenCV was built without SURF support" );
#endif
}
int xmap_index = mad24(dv0, xmap_step, mad24(du, (int)sizeof(float), xmap_offset));
int ymap_index = mad24(dv0, ymap_step, mad24(du, (int)sizeof(float), ymap_offset));
+ float u = tl_u + du;
+ float x_ = fma(u, scale, -ct[0]);
+ float ct1 = 1 - ct[2];
+
for (int dv = dv0, dv1 = min(rows, dv0 + rowsPerWI); dv < dv1; ++dv, xmap_index += xmap_step,
ymap_index += ymap_step)
{
__global float * xmap = (__global float *)(xmapptr + xmap_index);
__global float * ymap = (__global float *)(ymapptr + ymap_index);
- float u = tl_u + du;
float v = tl_v + dv;
+ float y_ = fma(v, scale, -ct[1]);
- float x_ = u / scale - ct[0];
- float y_ = v / scale - ct[1];
-
- float ct1 = 1 - ct[2];
float x = fma(ck_rinv[0], x_, fma(ck_rinv[1], y_, ck_rinv[2] * ct1));
float y = fma(ck_rinv[3], x_, fma(ck_rinv[4], y_, ck_rinv[5] * ct1));
float z = fma(ck_rinv[6], x_, fma(ck_rinv[7], y_, ck_rinv[8] * ct1));
- x /= z;
- y /= z;
+ if (z != 0)
+ x /= z, y /= z;
+ else
+ x = y = -1;
xmap[0] = x;
ymap[0] = y;
int xmap_index = mad24(dv0, xmap_step, mad24(du, (int)sizeof(float), xmap_offset));
int ymap_index = mad24(dv0, ymap_step, mad24(du, (int)sizeof(float), ymap_offset));
+ float u = (tl_u + du) * scale;
+ float x_, z_;
+ x_ = sincos(u, &z_);
+
for (int dv = dv0, dv1 = min(rows, dv0 + rowsPerWI); dv < dv1; ++dv, xmap_index += xmap_step,
ymap_index += ymap_step)
{
__global float * xmap = (__global float *)(xmapptr + xmap_index);
__global float * ymap = (__global float *)(ymapptr + ymap_index);
- float u = tl_u + du;
- float v = tl_v + dv;
- float x, y;
-
- u /= scale;
- float x_, y_, z_;
- x_ = sincos(u, &z_);
- y_ = v / scale;
+ float y_ = (tl_v + dv) * scale;
- float z;
+ float x, y, z;
x = fma(ck_rinv[0], x_, fma(ck_rinv[1], y_, ck_rinv[2] * z_));
y = fma(ck_rinv[3], x_, fma(ck_rinv[4], y_, ck_rinv[5] * z_));
z = fma(ck_rinv[6], x_, fma(ck_rinv[7], y_, ck_rinv[8] * z_));
int xmap_index = mad24(dv0, xmap_step, mad24(du, (int)sizeof(float), xmap_offset));
int ymap_index = mad24(dv0, ymap_step, mad24(du, (int)sizeof(float), ymap_offset));
+ float u = (tl_u + du) * scale;
+ float cosu, sinu = sincos(u, &cosu);
+
for (int dv = dv0, dv1 = min(rows, dv0 + rowsPerWI); dv < dv1; ++dv, xmap_index += xmap_step,
ymap_index += ymap_step)
{
__global float * xmap = (__global float *)(xmapptr + xmap_index);
__global float * ymap = (__global float *)(ymapptr + ymap_index);
- float u = tl_u + du;
- float v = tl_v + dv;
- float x, y;
-
- v /= scale;
- u /= scale;
+ float v = (tl_v + dv) * scale;
- float cosv, sinv = sincos(v, &cosv), cosu, sinu = sincos(u, &cosu);
+ float cosv, sinv = sincos(v, &cosv);
float x_ = sinv * sinu;
float y_ = -cosv;
float z_ = sinv * cosu;
- float z;
+ float x, y, z;
x = fma(ck_rinv[0], x_, fma(ck_rinv[1], y_, ck_rinv[2] * z_));
y = fma(ck_rinv[3], x_, fma(ck_rinv[4], y_, ck_rinv[5] * z_));
z = fma(ck_rinv[6], x_, fma(ck_rinv[7], y_, ck_rinv[8] * z_));
return uv;
}
+Rect PlaneWarper::buildMaps(Size src_size, InputArray K, InputArray R, OutputArray xmap, OutputArray ymap)
+{
+ return buildMaps(src_size, K, R, Mat::zeros(3, 1, CV_32FC1), xmap, ymap);
+}
+
Rect PlaneWarper::buildMaps(Size src_size, InputArray K, InputArray R, InputArray T, OutputArray _xmap, OutputArray _ymap)
{
projector_.setCameraParams(K, R, T);
k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap),
ocl::KernelArg::PtrReadOnly(uk_rinv), ocl::KernelArg::PtrReadOnly(ut),
- dst_tl.x, dst_tl.y, projector_.scale, rowsPerWI);
+ dst_tl.x, dst_tl.y, 1/projector_.scale, rowsPerWI);
size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI };
if (k.run(2, globalsize, NULL, true))
UMat uxmap = xmap.getUMat(), uymap = ymap.getUMat(), uk_rinv = k_rinv.getUMat(ACCESS_READ);
k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap),
- ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, projector_.scale, rowsPerWI);
+ ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, 1/projector_.scale, rowsPerWI);
size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI };
if (k.run(2, globalsize, NULL, true))
UMat uxmap = xmap.getUMat(), uymap = ymap.getUMat(), uk_rinv = k_rinv.getUMat(ACCESS_READ);
k.args(ocl::KernelArg::WriteOnlyNoSize(uxmap), ocl::KernelArg::WriteOnly(uymap),
- ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, projector_.scale,
+ ocl::KernelArg::PtrReadOnly(uk_rinv), dst_tl.x, dst_tl.y, 1/projector_.scale,
rowsPerWI);
size_t globalsize[2] = { dsize.width, (dsize.height + rowsPerWI - 1) / rowsPerWI };
static int64 timeLimitDefault;
static unsigned int iterationsLimitDefault;
+ unsigned int minIters;
unsigned int nIters;
unsigned int currentIter;
unsigned int runsPerIteration;
+ unsigned int perfValidationStage;
performance_metrics metrics;
void validateMetrics();
#include "precomp.hpp"
+#include <map>
+#include <iostream>
+#include <fstream>
+
+#if defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#endif
+
#ifdef HAVE_CUDA
#include "opencv2/core/cuda.hpp"
#endif
static bool param_collect_impl;
#endif
extern bool test_ipp_check;
+
#ifdef HAVE_CUDA
static int param_cuda_device;
#endif
-
#ifdef ANDROID
static int param_affinity_mask;
static bool log_power_checkpoints;
}
#endif
+static double perf_stability_criteria = 0.03; // 3%
+
namespace {
class PerfEnvironment: public ::testing::Environment
terminationReason = TERM_UNKNOWN;
}
+/*****************************************************************************************\
+* Performance validation results
+\*****************************************************************************************/
+
+static bool perf_validation_enabled = false;
+
+static std::string perf_validation_results_directory;
+static std::map<std::string, float> perf_validation_results;
+static std::string perf_validation_results_outfile;
+
+static double perf_validation_criteria = 0.03; // 3 %
+static double perf_validation_time_threshold_ms = 0.1;
+static int perf_validation_idle_delay_ms = 3000; // 3 sec
+
+static void loadPerfValidationResults(const std::string& fileName)
+{
+ perf_validation_results.clear();
+ std::ifstream infile(fileName.c_str());
+ while (!infile.eof())
+ {
+ std::string name;
+ float value = 0;
+ if (!(infile >> value))
+ {
+ if (infile.eof())
+ break; // it is OK
+ std::cout << "ERROR: Can't load performance validation results from " << fileName << "!" << std::endl;
+ return;
+ }
+ infile.ignore(1);
+ if (!(std::getline(infile, name)))
+ {
+ std::cout << "ERROR: Can't load performance validation results from " << fileName << "!" << std::endl;
+ return;
+ }
+ if (!name.empty() && name[name.size() - 1] == '\r') // CRLF processing on Linux
+ name.resize(name.size() - 1);
+ perf_validation_results[name] = value;
+ }
+ std::cout << "Performance validation results loaded from " << fileName << " (" << perf_validation_results.size() << " entries)" << std::endl;
+}
+
+static void savePerfValidationResult(const std::string& name, float value)
+{
+ perf_validation_results[name] = value;
+}
+
+static void savePerfValidationResults()
+{
+ if (!perf_validation_results_outfile.empty())
+ {
+ std::ofstream outfile((perf_validation_results_directory + perf_validation_results_outfile).c_str());
+ std::map<std::string, float>::const_iterator i;
+ for (i = perf_validation_results.begin(); i != perf_validation_results.end(); ++i)
+ {
+ outfile << i->second << ';';
+ outfile << i->first << std::endl;
+ }
+ outfile.close();
+ std::cout << "Performance validation results saved (" << perf_validation_results.size() << " entries)" << std::endl;
+ }
+}
+
+class PerfValidationEnvironment : public ::testing::Environment
+{
+public:
+ virtual ~PerfValidationEnvironment() {}
+ virtual void SetUp() {}
+
+ virtual void TearDown()
+ {
+ savePerfValidationResults();
+ }
+};
+
+
/*****************************************************************************************\
* ::perf::TestBase
"{ perf_list_impls |false |list available implementation variants and exit}"
"{ perf_run_cpu |false |deprecated, equivalent to --perf_impl=plain}"
"{ perf_strategy |default |specifies performance measuring strategy: default, base or simple (weak restrictions)}"
+ "{ perf_read_validation_results | |specifies file name with performance results from previous run}"
+ "{ perf_write_validation_results | |specifies file name to write performance validation results}"
#ifdef ANDROID
"{ perf_time_limit |6.0 |default time limit for a single test (in seconds)}"
"{ perf_affinity_mask |0 |set affinity mask for the main thread}"
}
#endif
+ {
+ const char* path = getenv("OPENCV_PERF_VALIDATION_DIR");
+ if (path)
+ perf_validation_results_directory = path;
+ }
+
+ std::string fileName_perf_validation_results_src = args.get<std::string>("perf_read_validation_results");
+ if (!fileName_perf_validation_results_src.empty())
+ {
+ perf_validation_enabled = true;
+ loadPerfValidationResults(perf_validation_results_directory + fileName_perf_validation_results_src);
+ }
+
+ perf_validation_results_outfile = args.get<std::string>("perf_write_validation_results");
+ if (!perf_validation_results_outfile.empty())
+ {
+ perf_validation_enabled = true;
+ ::testing::AddGlobalTestEnvironment(new PerfValidationEnvironment());
+ }
+
if (!args.check())
{
args.printErrors();
{
lastTime = totalTime = timeLimit = 0;
nIters = currentIter = runsPerIteration = 0;
+ minIters = param_min_samples;
verified = false;
+ perfValidationStage = 0;
}
#ifdef _MSC_VER
# pragma warning(pop)
has_next = false;
break;
}
- if (currentIter < param_min_samples)
+ if (currentIter < minIters)
{
has_next = true;
break;
calcMetrics();
- double criteria = 0.03; // 3%
if (fabs(metrics.mean) > 1e-6)
- has_next = metrics.stddev > criteria * fabs(metrics.mean);
+ has_next = metrics.stddev > perf_stability_criteria * fabs(metrics.mean);
else
has_next = true;
}
} while (false);
+ if (perf_validation_enabled && !has_next)
+ {
+ calcMetrics();
+ double median_ms = metrics.median * 1000.0f / metrics.frequency;
+
+ const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
+ std::string name = (test_info == 0) ? "" :
+ std::string(test_info->test_case_name()) + "--" + test_info->name();
+
+ if (!perf_validation_results.empty() && !name.empty())
+ {
+ std::map<std::string, float>::iterator i = perf_validation_results.find(name);
+ bool isSame = false;
+ bool found = false;
+ bool grow = false;
+ if (i != perf_validation_results.end())
+ {
+ found = true;
+ double prev_result = i->second;
+ grow = median_ms > prev_result;
+ isSame = fabs(median_ms - prev_result) <= perf_validation_criteria * fabs(median_ms);
+ if (!isSame)
+ {
+ if (perfValidationStage == 0)
+ {
+ printf("Performance is changed (samples = %d, median):\n %.2f ms (current)\n %.2f ms (previous)\n", (int)times.size(), median_ms, prev_result);
+ }
+ }
+ }
+ else
+ {
+ if (perfValidationStage == 0)
+ printf("New performance result is detected\n");
+ }
+ if (!isSame)
+ {
+ if (perfValidationStage < 2)
+ {
+ if (perfValidationStage == 0 && currentIter <= minIters * 3 && currentIter < nIters)
+ {
+ unsigned int new_minIters = std::max(minIters * 5, currentIter * 3);
+ printf("Increase minIters from %u to %u\n", minIters, new_minIters);
+ minIters = new_minIters;
+ has_next = true;
+ perfValidationStage++;
+ }
+ else if (found && currentIter >= nIters &&
+ median_ms > perf_validation_time_threshold_ms &&
+ (grow || metrics.stddev > perf_stability_criteria * fabs(metrics.mean)))
+ {
+ printf("Performance is unstable, it may be a result of overheat problems\n");
+ printf("Idle delay for %d ms... \n", perf_validation_idle_delay_ms);
+#if defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64
+ Sleep(perf_validation_idle_delay_ms);
+#else
+ usleep(perf_validation_idle_delay_ms * 1000);
+#endif
+ has_next = true;
+ minIters = std::min(minIters * 5, nIters);
+ // reset collected samples
+ currentIter = 0;
+ times.clear();
+ metrics.clear();
+ perfValidationStage += 2;
+ }
+ if (!has_next)
+ {
+ printf("Assume that current result is valid\n");
+ }
+ }
+ else
+ {
+ printf("Re-measured performance result: %.2f ms\n", median_ms);
+ }
+ }
+ }
+
+ if (!has_next && !name.empty())
+ {
+ savePerfValidationResult(name, (float)median_ms);
+ }
+ }
+
#ifdef ANDROID
if (log_power_checkpoints)
{
else if (getCurrentPerformanceStrategy() == PERF_STRATEGY_SIMPLE)
{
double mean = metrics.mean * 1000.0f / metrics.frequency;
+ double median = metrics.median * 1000.0f / metrics.frequency;
double stddev = metrics.stddev * 1000.0f / metrics.frequency;
double percents = stddev / mean * 100.f;
- printf("[ PERFSTAT ] (samples = %d, mean = %.2f, stddev = %.2f (%.1f%%))\n", (int)metrics.samples, mean, stddev, percents);
+ printf("[ PERFSTAT ] (samples = %d, mean = %.2f, median = %.2f, stddev = %.2f (%.1f%%))\n", (int)metrics.samples, mean, median, stddev, percents);
}
else
{
nmodes = nNewModes;
//make new mode if needed and exit
- if( !fitsPDF )
+ if( !fitsPDF && alphaT > 0.f )
{
// replace the weakest or add a new one
int mode = nmodes == nmixtures ? nmixtures-1 : nmodes++;
macro(ocv_videoio_configure_target)
if(APPLE)
- ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS)
- if(HAVE_OBJC_EXCEPTIONS)
- foreach(source ${OPENCV_MODULE_${the_module}_SOURCES})
- if("${source}" MATCHES "\\.mm$")
- get_source_file_property(flags "${source}" COMPILE_FLAGS)
- if(flags)
- set(flags "${_flags} -fobjc-exceptions")
- else()
- set(flags "-fobjc-exceptions")
- endif()
-
- set_source_files_properties("${source}" PROPERTIES COMPILE_FLAGS "${flags}")
- endif()
- endforeach()
- endif()
+ add_apple_compiler_options(the_module)
endif()
if(BUILD_SHARED_LIBS)
AVAssetWriterInput* mMovieWriterInput;
AVAssetWriterInputPixelBufferAdaptor* mMovieWriterAdaptor;
- unsigned char* imagedata;
NSString* path;
NSString* codec;
NSString* fileType;
CMFormatDescriptionRef format = [[ports objectAtIndex:0] formatDescription];
CGSize s1 = CMVideoFormatDescriptionGetPresentationDimensions(format, YES, YES);
- int width=(int)s1.width, height=(int)s1.height;
+ int w=(int)s1.width, h=(int)s1.height;
[localpool drain];
switch (property_id) {
case CV_CAP_PROP_FRAME_WIDTH:
- return width;
+ return w;
case CV_CAP_PROP_FRAME_HEIGHT:
- return height;
+ return h;
case CV_CAP_PROP_IOS_DEVICE_FOCUS:
return mCaptureDevice.focusMode;
// Failed
// connection.videoOrientation = AVCaptureVideoOrientationPortrait;
+ (void)captureOutput;
+ (void)connection;
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
memcpy(imagedata, baseaddress, currSize);
if (image == NULL) {
- image = cvCreateImageHeader(cvSize(width,height), IPL_DEPTH_8U, 4);
+ image = cvCreateImageHeader(cvSize((int)width,(int)height), IPL_DEPTH_8U, 4);
}
- image->width =width;
- image->height = height;
+ image->width = (int)width;
+ image->height = (int)height;
image->nChannels = 4;
image->depth = IPL_DEPTH_8U;
image->widthStep = (int)rowBytes;
image->imageData = imagedata;
- image->imageSize = currSize;
+ image->imageSize = (int)currSize;
if (bgr_image == NULL) {
- bgr_image = cvCreateImageHeader(cvSize(width,height), IPL_DEPTH_8U, 3);
+ bgr_image = cvCreateImageHeader(cvSize((int)width,(int)height), IPL_DEPTH_8U, 3);
}
- bgr_image->width =width;
- bgr_image->height = height;
+ bgr_image->width = (int)width;
+ bgr_image->height = (int)height;
bgr_image->nChannels = 3;
bgr_image->depth = IPL_DEPTH_8U;
bgr_image->widthStep = (int)rowBytes;
bgr_image->imageData = bgr_imagedata;
- bgr_image->imageSize = currSize;
+ bgr_image->imageSize = (int)currSize;
cvCvtColor(image, bgr_image, CV_BGRA2BGR);
// iOS provides hardware accelerated rotation through AVCaptureConnection class
// I can't get it work.
if (bgr_image_r90 == NULL){
- bgr_image_r90 = cvCreateImage(cvSize(height, width), IPL_DEPTH_8U, 3);
+ bgr_image_r90 = cvCreateImage(cvSize((int)height, (int)width), IPL_DEPTH_8U, 3);
}
cvTranspose(bgr_image, bgr_image_r90);
cvFlip(bgr_image_r90, NULL, 1);
memcpy(imagedata, baseaddress, currSize);
if (image == NULL) {
- image = cvCreateImageHeader(cvSize(width,height), IPL_DEPTH_8U, 4);
+ image = cvCreateImageHeader(cvSize((int)width,(int)height), IPL_DEPTH_8U, 4);
}
- image->width =width;
- image->height = height;
+ image->width = (int)width;
+ image->height = (int)height;
image->nChannels = 4;
image->depth = IPL_DEPTH_8U;
- image->widthStep = rowBytes;
+ image->widthStep = (int)rowBytes;
image->imageData = imagedata;
- image->imageSize = currSize;
+ image->imageSize = (int)currSize;
if (bgr_image == NULL) {
- bgr_image = cvCreateImageHeader(cvSize(width,height), IPL_DEPTH_8U, 3);
+ bgr_image = cvCreateImageHeader(cvSize((int)width,(int)height), IPL_DEPTH_8U, 3);
}
- bgr_image->width =width;
- bgr_image->height = height;
+ bgr_image->width = (int)width;
+ bgr_image->height = (int)height;
bgr_image->nChannels = 3;
bgr_image->depth = IPL_DEPTH_8U;
- bgr_image->widthStep = rowBytes;
+ bgr_image->widthStep = (int)rowBytes;
bgr_image->imageData = bgr_imagedata;
- bgr_image->imageSize = currSize;
+ bgr_image->imageSize = (int)currSize;
cvCvtColor(image, bgr_image,CV_BGRA2BGR);
return 30.0; //TODO: Debugging
}
-double CvCaptureFile::getProperty(int property_id){
+double CvCaptureFile::getProperty(int /*property_id*/){
/*
if (mCaptureSession == nil) return 0;
return 1.0; //Debugging
}
-bool CvCaptureFile::setProperty(int property_id, double value) {
+bool CvCaptureFile::setProperty(int /*property_id*/, double /*value*/) {
/*
if (mCaptureSession == nil) return false;
// writer status check
if (![mMovieWriterInput isReadyForMoreMediaData] || mMovieWriter.status != AVAssetWriterStatusWriting ) {
NSLog(@"[mMovieWriterInput isReadyForMoreMediaData] Not ready for media data or ...");
- NSLog(@"mMovieWriter.status: %d. Error: %@", mMovieWriter.status, [mMovieWriter.error localizedDescription]);
+ NSLog(@"mMovieWriter.status: %d. Error: %@", (int)mMovieWriter.status, [mMovieWriter.error localizedDescription]);
[localpool drain];
return false;
}
//STUFF YOU CAN CHANGE
#ifdef _DEBUG
+#include <strsafe.h>
//change for verbose debug info
static bool gs_verbose = true;
{
[[NSNotificationCenter defaultCenter] removeObserver:self];
[[UIDevice currentDevice] endGeneratingDeviceOrientationNotifications];
+ [super dealloc];
}
- (void)deviceOrientationDidChange:(NSNotification*)notification
{
+ (void)notification;
UIDeviceOrientation orientation = [UIDevice currentDevice].orientation;
switch (orientation)
default:
break;
}
- NSLog(@"deviceOrientationDidChange: %d", orientation);
+ NSLog(@"deviceOrientationDidChange: %d", (int)orientation);
[self updateOrientation];
}
if ([device position] == desiredPosition) {
[self.captureSession beginConfiguration];
- NSError* error;
+ NSError* error = nil;
AVCaptureDeviceInput *input = [AVCaptureDeviceInput deviceInputWithDevice:device error:&error];
if (!input) {
NSLog(@"error creating input %@", [error localizedDescription]);
// support for autofocus
if ([device isFocusModeSupported:AVCaptureFocusModeContinuousAutoFocus]) {
- NSError *error = nil;
+ error = nil;
if ([device lockForConfiguration:&error]) {
device.focusMode = AVCaptureFocusModeContinuousAutoFocus;
[device unlockForConfiguration];
[super start];
if (self.recordVideo == YES) {
- NSError* error;
+ NSError* error = nil;
if ([[NSFileManager defaultManager] fileExistsAtPath:[self videoFileString]]) {
[[NSFileManager defaultManager] removeItemAtPath:[self videoFileString] error:&error];
}
- (void)captureOutput:(AVCaptureOutput *)captureOutput didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection *)connection
{
+ (void)captureOutput;
+ (void)connection;
if (self.delegate) {
// convert from Core Media to Core Video
}
// delegate image processing to the delegate
- cv::Mat image(height, width, format_opencv, bufferAddress, bytesPerRow);
+ cv::Mat image((int)height, (int)width, format_opencv, bufferAddress, bytesPerRow);
- cv::Mat* result = NULL;
CGImage* dstImage;
if ([self.delegate respondsToSelector:@selector(processImage:)]) {
// check if matrix data pointer or dimensions were changed by the delegate
bool iOSimage = false;
- if (height == image.rows && width == image.cols && format_opencv == image.type() && bufferAddress == image.data && bytesPerRow == image.step) {
+ if (height == (size_t)image.rows && width == (size_t)image.cols && format_opencv == image.type() && bufferAddress == image.data && bytesPerRow == image.step) {
iOSimage = true;
}
ALAssetsLibrary *library = [[ALAssetsLibrary alloc] init];
if ([library videoAtPathIsCompatibleWithSavedPhotosAlbum:[self videoFileURL]]) {
[library writeVideoAtPathToSavedPhotosAlbum:[self videoFileURL]
- completionBlock:^(NSURL *assetURL, NSError *error){}];
+ completionBlock:^(NSURL *assetURL, NSError *error){ (void)assetURL; (void)error; }];
}
}
However, opencv2.framework directory is erased and recreated on each run.
"""
-import glob, re, os, os.path, shutil, string, sys
+import glob, re, os, os.path, shutil, string, sys, exceptions, subprocess
+
+def execute(cmd):
+ try:
+ print >>sys.stderr, "Executing:", cmd
+ retcode = subprocess.call(cmd, shell=True)
+ if retcode < 0:
+ raise Exception("Child was terminated by signal:", -retcode)
+ elif retcode > 0:
+ raise Exception("Child returned:", retcode)
+ except OSError as e:
+ raise Exception("Execution failed:", e)
def build_opencv(srcroot, buildroot, target, arch):
"builds OpenCV for device or simulator"
# if cmake cache exists, just rerun cmake to update OpenCV.xcodeproj if necessary
if os.path.isfile(os.path.join(builddir, "CMakeCache.txt")):
- os.system("cmake %s ." % (cmakeargs,))
+ execute("cmake %s ." % (cmakeargs,))
else:
- os.system("cmake %s %s" % (cmakeargs, srcroot))
+ execute("cmake %s %s" % (cmakeargs, srcroot))
for wlib in [builddir + "/modules/world/UninstalledProducts/libopencv_world.a",
builddir + "/lib/Release/libopencv_world.a"]:
if os.path.isfile(wlib):
os.remove(wlib)
- os.system("xcodebuild IPHONEOS_DEPLOYMENT_TARGET=6.0 -parallelizeTargets ARCHS=%s -jobs 8 -sdk %s -configuration Release -target ALL_BUILD" % (arch, target.lower()))
- os.system("xcodebuild IPHONEOS_DEPLOYMENT_TARGET=6.0 ARCHS=%s -sdk %s -configuration Release -target install install" % (arch, target.lower()))
+ execute("xcodebuild IPHONEOS_DEPLOYMENT_TARGET=6.0 -parallelizeTargets ARCHS=%s -jobs 8 -sdk %s -configuration Release -target ALL_BUILD" % (arch, target.lower()))
+ execute("xcodebuild IPHONEOS_DEPLOYMENT_TARGET=6.0 ARCHS=%s -sdk %s -configuration Release -target install install" % (arch, target.lower()))
os.chdir(currdir)
def put_framework_together(srcroot, dstroot):
# make universal static lib
wlist = " ".join(["../build/" + t + "/lib/Release/libopencv_world.a" for t in targetlist])
- os.system("lipo -create " + wlist + " -o " + dstdir + "/opencv2")
+ execute("lipo -create " + wlist + " -o " + dstdir + "/opencv2")
# copy Info.plist
shutil.copyfile(tdir0 + "/ios/Info.plist", dstdir + "/Resources/Info.plist")
def build_framework(srcroot, dstroot):
"main function to do all the work"
- targets = ["iPhoneOS", "iPhoneOS", "iPhoneOS", "iPhoneSimulator", "iPhoneSimulator"]
- archs = ["armv7", "armv7s", "arm64", "i386", "x86_64"]
- for i in range(len(targets)):
- build_opencv(srcroot, os.path.join(dstroot, "build"), targets[i], archs[i])
+ targets = [("armv7", "iPhoneOS"),
+ ("armv7s", "iPhoneOS"),
+ ("arm64", "iPhoneOS"),
+ ("i386", "iPhoneSimulator"),
+ ("x86_64", "iPhoneSimulator")]
+ for t in targets:
+ build_opencv(srcroot, os.path.join(dstroot, "build"), t[1], t[0])
put_framework_together(srcroot, dstroot)
print "Usage:\n\t./build_framework.py <outputdir>\n\n"
sys.exit(0)
- build_framework(os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), "../..")), os.path.abspath(sys.argv[1]))
+ try:
+ build_framework(os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), "../..")), os.path.abspath(sys.argv[1]))
+ except Exception as e:
+ print >>sys.stderr, e
+ sys.exit(1)
set (CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
# Hidden visibilty is required for cxx on iOS
-set (CMAKE_C_FLAGS "")
-set (CMAKE_CXX_FLAGS "-stdlib=libc++ -headerpad_max_install_names -fvisibility=hidden -fvisibility-inlines-hidden")
+set (no_warn "-Wno-unused-function -Wno-overloaded-virtual")
+set (CMAKE_C_FLAGS "${no_warn}")
+set (CMAKE_CXX_FLAGS "-stdlib=libc++ -fvisibility=hidden -fvisibility-inlines-hidden ${no_warn}")
set (CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -fomit-frame-pointer -ffast-math")