From b44b1ab47b791b8e61d922412bb054302dffbbab Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 4 Mar 2015 16:37:41 +0300 Subject: [PATCH] reduce matrix_reductions instantiates for tiny build --- modules/gpu/src/cuda/matrix_reductions.cu | 36 ++++ modules/gpu/src/matrix_reductions.cpp | 262 ++++++++++++++++++++++++++++-- 2 files changed, 281 insertions(+), 17 deletions(-) diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index 745daca..6048d41 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -462,6 +462,7 @@ namespace sum } template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -485,8 +486,10 @@ namespace sum template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -495,6 +498,7 @@ namespace sum template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask) @@ -504,6 +508,7 @@ namespace sum } template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -527,8 +532,10 @@ namespace sum template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -537,6 +544,7 @@ namespace sum template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template struct Sqr : unary_function { @@ -553,6 +561,7 @@ namespace sum } template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -576,8 +585,10 @@ namespace sum template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#ifndef OPENCV_TINY_GPU_MODULE template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); @@ -586,6 +597,7 @@ namespace sum template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); template void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask); +#endif } ///////////////////////////////////////////////////////////// @@ -773,12 +785,16 @@ namespace minMax } template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#endif template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#endif } ///////////////////////////////////////////////////////////// @@ -955,12 +971,16 @@ namespace minMaxLoc } template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#endif template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#ifndef OPENCV_TINY_GPU_MODULE template void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#endif } ///////////////////////////////////////////////////////////// @@ -1079,12 +1099,16 @@ namespace countNonZero } template int run(const PtrStepSzb src, PtrStep buf); +#ifndef OPENCV_TINY_GPU_MODULE template int run(const PtrStepSzb src, PtrStep buf); template int run(const PtrStepSzb src, PtrStep buf); template int run(const PtrStepSzb src, PtrStep buf); template int run(const PtrStepSzb src, PtrStep buf); +#endif template int run(const PtrStepSzb src, PtrStep buf); +#ifndef OPENCV_TINY_GPU_MODULE template int run(const PtrStepSzb src, PtrStep buf); +#endif } ////////////////////////////////////////////////////////////////////////////// @@ -1257,6 +1281,11 @@ namespace reduce funcs[op]((PtrStepSz) src, (D*) dst, stream); } +#ifdef OPENCV_TINY_GPU_MODULE + template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); + template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); + template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); +#else template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); @@ -1280,6 +1309,7 @@ namespace reduce template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); template void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream); +#endif /////////////////////////////////////////////////////////// @@ -1338,6 +1368,11 @@ namespace reduce funcs[cn][op](src, dst, stream); } +#ifdef OPENCV_TINY_GPU_MODULE + template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); + template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); + template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); +#else template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); @@ -1361,6 +1396,7 @@ namespace reduce template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); template void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); +#endif } #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 4e09246..c7a760c 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -261,6 +261,18 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][5] = + { + {0, ::sum::run, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, ::sum::run, 0, 0, 0}, + {0, 0, 0, 0, 0}, + }; +#else static const func_t funcs[7][5] = { {0, ::sum::run, ::sum::run, ::sum::run, ::sum::run}, @@ -271,6 +283,7 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) {0, ::sum::run, ::sum::run, ::sum::run, ::sum::run}, {0, ::sum::run, ::sum::run, ::sum::run, ::sum::run} }; +#endif CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); @@ -286,6 +299,8 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) buf.setTo(Scalar::all(0)); const func_t func = funcs[src.depth()][src.channels()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double result[4]; func(src, buf.data, result, mask); @@ -307,6 +322,18 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][5] = + { + {0, ::sum::runAbs, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, ::sum::runAbs, 0, 0, 0}, + {0, 0, 0, 0, 0}, + }; +#else static const func_t funcs[7][5] = { {0, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs}, @@ -317,6 +344,7 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) {0, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs}, {0, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs, ::sum::runAbs} }; +#endif CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); @@ -332,6 +360,8 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) buf.setTo(Scalar::all(0)); const func_t func = funcs[src.depth()][src.channels()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double result[4]; func(src, buf.data, result, mask); @@ -353,6 +383,18 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][5] = + { + {0, ::sum::runSqr, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, ::sum::runSqr, 0, 0, 0}, + {0, 0, 0, 0, 0}, + }; +#else static const func_t funcs[7][5] = { {0, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr}, @@ -363,6 +405,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) {0, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr}, {0, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr, ::sum::runSqr} }; +#endif CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); @@ -378,6 +421,8 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) buf.setTo(Scalar::all(0)); const func_t func = funcs[src.depth()][src.channels()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double result[4]; func(src, buf.data, result, mask); @@ -405,6 +450,18 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf) { typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + ::minMax::run, + 0/*::minMax::run*/, + 0/*::minMax::run*/, + 0/*::minMax::run*/, + 0/*::minMax::run*/, + ::minMax::run, + 0/*::minMax::run*/, + }; +#else static const func_t funcs[] = { ::minMax::run, @@ -413,8 +470,9 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ::minMax::run, ::minMax::run, ::minMax::run, - ::minMax::run + ::minMax::run, }; +#endif CV_Assert( src.channels() == 1 ); CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) ); @@ -430,6 +488,8 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ensureSizeIsEnough(buf_size, CV_8U, buf); const func_t func = funcs[src.depth()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double temp1, temp2; func(src, mask, minVal ? minVal : &temp1, maxVal ? maxVal : &temp2, buf); @@ -456,6 +516,18 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf) { typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + ::minMaxLoc::run, + 0/*::minMaxLoc::run*/, + 0/*::minMaxLoc::run*/, + 0/*::minMaxLoc::run*/, + 0/*::minMaxLoc::run*/, + ::minMaxLoc::run, + 0/*::minMaxLoc::run*/, + }; +#else static const func_t funcs[] = { ::minMaxLoc::run, @@ -464,8 +536,9 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ::minMaxLoc::run, ::minMaxLoc::run, ::minMaxLoc::run, - ::minMaxLoc::run + ::minMaxLoc::run, }; +#endif CV_Assert( src.channels() == 1 ); CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) ); @@ -482,6 +555,8 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ensureSizeIsEnough(locbuf_size, CV_8U, locBuf); const func_t func = funcs[src.depth()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); double temp1, temp2; Point temp3, temp4; @@ -508,6 +583,18 @@ int cv::gpu::countNonZero(const GpuMat& src) int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) { typedef int (*func_t)(const PtrStepSzb src, PtrStep buf); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[] = + { + ::countNonZero::run, + 0/*::countNonZero::run*/, + 0/*::countNonZero::run*/, + 0/*::countNonZero::run*/, + 0/*::countNonZero::run*/, + ::countNonZero::run, + 0/*::countNonZero::run*/, + }; +#else static const func_t funcs[] = { ::countNonZero::run, @@ -516,8 +603,9 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ::countNonZero::run, ::countNonZero::run, ::countNonZero::run, - ::countNonZero::run + ::countNonZero::run, }; +#endif CV_Assert(src.channels() == 1); @@ -532,6 +620,8 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ensureSizeIsEnough(buf_size, CV_8U, buf); const func_t func = funcs[src.depth()]; + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); return func(src, buf); } @@ -562,6 +652,74 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int if (dim == 0) { typedef void (*func_t)(PtrStepSzb src, void* dst, int op, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + ::reduce::rows, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + ::reduce::rows, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + ::reduce::rows, + 0/*::reduce::rows*/, + }, + { + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + 0/*::reduce::rows*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -571,7 +729,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -580,7 +738,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, 0/*::reduce::rows*/, 0/*::reduce::rows*/, - 0/*::reduce::rows*/ + 0/*::reduce::rows*/, }, { 0/*::reduce::rows*/, @@ -589,7 +747,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -598,7 +756,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ::reduce::rows, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -607,7 +765,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, ::reduce::rows, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -616,7 +774,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, 0/*::reduce::rows*/, ::reduce::rows, - ::reduce::rows + ::reduce::rows, }, { 0/*::reduce::rows*/, @@ -625,9 +783,10 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::rows*/, 0/*::reduce::rows*/, 0/*::reduce::rows*/, - ::reduce::rows + ::reduce::rows, } }; +#endif const func_t func = funcs[src.depth()][dst.depth()]; @@ -639,6 +798,74 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int else { typedef void (*func_t)(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); +#ifdef OPENCV_TINY_GPU_MODULE + static const func_t funcs[7][7] = + { + { + ::reduce::cols, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + ::reduce::cols, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + ::reduce::cols, + 0/*::reduce::cols*/, + }, + { + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + 0/*::reduce::cols*/, + } + }; +#else static const func_t funcs[7][7] = { { @@ -648,7 +875,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -657,7 +884,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, 0/*::reduce::cols*/, 0/*::reduce::cols*/, - 0/*::reduce::cols*/ + 0/*::reduce::cols*/, }, { 0/*::reduce::cols*/, @@ -666,7 +893,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -675,7 +902,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ::reduce::cols, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -684,7 +911,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, ::reduce::cols, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -693,7 +920,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, 0/*::reduce::cols*/, ::reduce::cols, - ::reduce::cols + ::reduce::cols, }, { 0/*::reduce::cols*/, @@ -702,9 +929,10 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int 0/*::reduce::cols*/, 0/*::reduce::cols*/, 0/*::reduce::cols*/, - ::reduce::cols + ::reduce::cols, } }; +#endif const func_t func = funcs[src.depth()][dst.depth()]; -- 2.7.4